Shami96 commited on
Commit
0781251
·
verified ·
1 Parent(s): ab686bc

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +308 -32
updated_word.py CHANGED
@@ -16,6 +16,65 @@ BLACK = RGBColor(0, 0, 0)
16
  RED = RGBColor(0xFF, 0x00, 0x00)
17
 
18
  # ----------------------------- text helpers -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def _find_table_with_headers(doc: Document, must_have: list[str]) -> Optional[Table]:
20
  for t in doc.tables:
21
  if not t.rows:
@@ -367,9 +426,10 @@ def fill_mass_vehicle_table_preserve_headers(table: Table, arrays: Dict[str, Lis
367
  put(row, "trip", "Trip Records", i)
368
  put(row, "frs", "Fault Recording/ Reporting on Suspension System", i)
369
 
 
370
  def overwrite_summary_details_cells(doc: Document, section_name: str, section_dict: Dict[str, List[str]]) -> int:
371
  """For a Summary table (Maintenance/Mass/Fatigue), replace the entire DETAILS cell
372
- for each Std N row with the JSON text (written in black)."""
373
  # build desired texts
374
  desired: Dict[str, str] = { _std_key(k): join_value(v) for k, v in section_dict.items() }
375
 
@@ -404,7 +464,8 @@ def overwrite_summary_details_cells(doc: Document, section_name: str, section_di
404
  if not cand:
405
  continue
406
 
407
- _set_cell_text_black(row.cells[details_col], cand) # full overwrite, black
 
408
  updated += 1
409
  return updated
410
 
@@ -483,35 +544,201 @@ def _set_text_and_black(run, new_text: str):
483
  pass
484
 
485
  def update_business_summary_once(doc: Document, value) -> bool:
486
- """Replace only the red summary paragraph; keep 'Accreditation Number' and 'Expiry Date' lines."""
487
- loc = (find_label_cell(doc, "Nature of the Operators Business (Summary)")
488
- or find_label_cell(doc, "Nature of the Operators Business (Summary):"))
489
- if not loc:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  return False
491
-
492
- t, r, c = loc
493
- cell = get_adjacent_value_cell(t, r, c)
494
- if not cell.paragraphs:
495
- cell.add_paragraph("")
496
-
497
- txt = join_value(value)
498
-
499
- # find paragraphs with any red runs (the placeholders for the summary)
500
- red_paras = [p for p in cell.paragraphs if any(is_red_run(run) for run in p.runs)]
501
-
502
- if red_paras:
503
- # write the summary into the first red paragraph (in black)
504
- _clear_para_and_write_black(red_paras[0], txt)
505
- # clear any extra red placeholders
506
- for p in red_paras[1:]:
507
- _clear_para_and_write_black(p, "")
508
  else:
509
- # no red placeholder found: just put the summary into the first paragraph, leave others
510
- _clear_para_and_write_black(cell.paragraphs[0], txt)
511
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
  return True
513
 
514
-
515
  def _nuke_cell_paragraphs(cell: _Cell):
516
  """Remove ALL paragraphs from a cell (true delete, not just emptying runs)."""
517
  for p in list(cell.paragraphs):
@@ -530,18 +757,69 @@ def _clear_para_and_write_black(paragraph, text: str):
530
  pass
531
 
532
  def _set_cell_text_black(cell, text: str):
533
- """Clear a table cell and insert black text."""
534
  # remove text from all runs in all paragraphs
535
  for p in cell.paragraphs:
536
  for r in p.runs:
537
  r.text = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  p = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph()
539
- r = p.add_run(str(text or ""))
540
  r.font.color.rgb = BLACK
541
  try:
542
  r.font.color.theme_color = None
543
  except Exception:
544
  pass
 
 
 
 
 
 
 
 
 
 
545
 
546
  def nz(x: Optional[str]) -> str:
547
  return (x or "").strip()
@@ -1143,9 +1421,7 @@ def run(input_json: Path, template_docx: Path, output_docx: Path):
1143
  # 9) Nature of the Operators Business (Summary): write once (no duplicates)
1144
  biz = data.get("Nature of the Operators Business (Summary)", {})
1145
  if biz:
1146
- val = biz.get("Nature of the Operators Business (Summary):") or next(iter(biz.values()), "")
1147
- if val:
1148
- update_business_summary_once(doc, val)
1149
 
1150
  # 10) Summary tables: FULL OVERWRITE of DETAILS from JSON
1151
  mm_sum = data.get("Maintenance Management Summary", {})
 
16
  RED = RGBColor(0xFF, 0x00, 0x00)
17
 
18
  # ----------------------------- text helpers -----------------------------
19
+ # New function specifically for Management Summary tables
20
+ def _set_cell_text_black_with_line_breaks(cell, text: str):
21
+ """Clear a table cell and insert black text with line breaks after periods (for Management Summary tables only)."""
22
+ # Clear all existing paragraphs completely
23
+ for p in list(cell.paragraphs):
24
+ p._element.getparent().remove(p._element)
25
+
26
+ # Process text to add line breaks after periods
27
+ processed_text = str(text or "").strip()
28
+ if not processed_text:
29
+ p = cell.add_paragraph()
30
+ r = p.add_run("")
31
+ r.font.color.rgb = BLACK
32
+ try:
33
+ r.font.color.theme_color = None
34
+ except Exception:
35
+ pass
36
+ return
37
+
38
+ # Split on periods followed by space, but keep the period with the sentence
39
+ import re
40
+ sentences = re.split(r'(\.\s+)', processed_text)
41
+
42
+ # Reconstruct sentences with periods
43
+ clean_sentences = []
44
+ for i in range(0, len(sentences), 2):
45
+ sentence = sentences[i]
46
+ if i + 1 < len(sentences) and sentences[i + 1].strip() == '.':
47
+ sentence += '.'
48
+ elif sentence.endswith('.'):
49
+ pass # already has period
50
+ clean_sentences.append(sentence.strip())
51
+
52
+ # Remove empty sentences
53
+ clean_sentences = [s for s in clean_sentences if s]
54
+
55
+ if not clean_sentences:
56
+ p = cell.add_paragraph()
57
+ r = p.add_run(processed_text)
58
+ r.font.color.rgb = BLACK
59
+ try:
60
+ r.font.color.theme_color = None
61
+ except Exception:
62
+ pass
63
+ return
64
+
65
+ # Add each sentence as a new paragraph with no spacing
66
+ for sentence in clean_sentences:
67
+ p = cell.add_paragraph()
68
+ # Remove paragraph spacing
69
+ p.paragraph_format.space_before = Pt(0)
70
+ p.paragraph_format.space_after = Pt(0)
71
+
72
+ r = p.add_run(sentence)
73
+ r.font.color.rgb = BLACK
74
+ try:
75
+ r.font.color.theme_color = None
76
+ except Exception:
77
+ pass
78
  def _find_table_with_headers(doc: Document, must_have: list[str]) -> Optional[Table]:
79
  for t in doc.tables:
80
  if not t.rows:
 
426
  put(row, "trip", "Trip Records", i)
427
  put(row, "frs", "Fault Recording/ Reporting on Suspension System", i)
428
 
429
+ # Modified function for Management Summary tables only
430
  def overwrite_summary_details_cells(doc: Document, section_name: str, section_dict: Dict[str, List[str]]) -> int:
431
  """For a Summary table (Maintenance/Mass/Fatigue), replace the entire DETAILS cell
432
+ for each Std N row with the JSON text (written in black with line breaks after periods)."""
433
  # build desired texts
434
  desired: Dict[str, str] = { _std_key(k): join_value(v) for k, v in section_dict.items() }
435
 
 
464
  if not cand:
465
  continue
466
 
467
+ # Use the special function with line breaks for Management Summary tables
468
+ _set_cell_text_black_with_line_breaks(row.cells[details_col], cand)
469
  updated += 1
470
  return updated
471
 
 
544
  pass
545
 
546
  def update_business_summary_once(doc: Document, value) -> bool:
547
+ """
548
+ Independent handler for Nature of the Operators Business (Summary).
549
+ Completely bypasses other helper functions to avoid interference.
550
+ """
551
+ # Find the label cell
552
+ target_table = None
553
+ target_row = None
554
+ target_col = None
555
+
556
+ for table in doc.tables:
557
+ for r_idx, row in enumerate(table.rows):
558
+ for c_idx, cell in enumerate(row.cells):
559
+ cell_text_content = ""
560
+ for paragraph in cell.paragraphs:
561
+ for run in paragraph.runs:
562
+ cell_text_content += run.text
563
+
564
+ # Check if this is the Nature of business label
565
+ normalized = cell_text_content.strip().lower().replace(":", "")
566
+ if "nature of the operators business" in normalized and "summary" in normalized:
567
+ target_table = table
568
+ target_row = r_idx
569
+ target_col = c_idx
570
+ break
571
+ if target_table:
572
+ break
573
+ if target_table:
574
+ break
575
+
576
+ if not target_table:
577
  return False
578
+
579
+ # Get the value cell (usually to the right or below)
580
+ value_cell = None
581
+ if target_col + 1 < len(target_table.rows[target_row].cells):
582
+ # Try cell to the right
583
+ value_cell = target_table.rows[target_row].cells[target_col + 1]
584
+ elif target_row + 1 < len(target_table.rows):
585
+ # Try cell below
586
+ value_cell = target_table.rows[target_row + 1].cells[target_col]
 
 
 
 
 
 
 
 
587
  else:
588
+ # Fallback to same cell
589
+ value_cell = target_table.rows[target_row].cells[target_col]
590
+
591
+ if not value_cell:
592
+ return False
593
+
594
+ # Get existing content to check for existing sub-labels (fix RGB color access)
595
+ existing_content = ""
596
+ for paragraph in value_cell.paragraphs:
597
+ for run in paragraph.runs:
598
+ # Better red color detection - avoid AttributeError
599
+ is_red = False
600
+ if run.font.color and run.font.color.rgb:
601
+ try:
602
+ rgb = run.font.color.rgb
603
+ # Use proper RGB color access
604
+ if hasattr(rgb, '__iter__') and len(rgb) >= 3:
605
+ r, g, b = rgb[0], rgb[1], rgb[2]
606
+ is_red = r > 150 and g < 100 and b < 100
607
+ else:
608
+ # Alternative method for RGBColor objects
609
+ r = (rgb >> 16) & 0xFF if hasattr(rgb, '__rshift__') else getattr(rgb, 'red', 0)
610
+ g = (rgb >> 8) & 0xFF if hasattr(rgb, '__rshift__') else getattr(rgb, 'green', 0)
611
+ b = rgb & 0xFF if hasattr(rgb, '__and__') else getattr(rgb, 'blue', 0)
612
+ is_red = r > 150 and g < 100 and b < 100
613
+ except:
614
+ is_red = False
615
+
616
+ if not is_red:
617
+ existing_content += run.text
618
+ existing_content += "\n"
619
+ existing_content = existing_content.strip()
620
+
621
+ # Extract existing sub-labels if they exist
622
+ existing_acc = ""
623
+ existing_exp = ""
624
+ if existing_content:
625
+ import re
626
+ acc_match = re.search(r'Accreditation Number[:\s]*([^\n\r]+)', existing_content, re.IGNORECASE)
627
+ exp_match = re.search(r'Expiry Date[:\s]*([^\n\r]+)', existing_content, re.IGNORECASE)
628
+
629
+ if acc_match:
630
+ existing_acc = acc_match.group(1).strip()
631
+ if exp_match:
632
+ existing_exp = exp_match.group(1).strip()
633
+
634
+ # Process the JSON data
635
+ if isinstance(value, dict):
636
+ # Extract values from the dictionary
637
+ summary_text_raw = (value.get("Nature of the Operators Business (Summary)") or
638
+ value.get("Nature of the Operators Business (Summary):") or [])
639
+ expiry_date_raw = value.get("Expiry Date", [])
640
+ accreditation_number_raw = value.get("Accreditation Number", [])
641
+
642
+ # Convert to strings
643
+ summary_text = ""
644
+ if isinstance(summary_text_raw, list) and summary_text_raw:
645
+ summary_text = str(summary_text_raw[0]).strip()
646
+ elif summary_text_raw:
647
+ summary_text = str(summary_text_raw).strip()
648
+
649
+ expiry_date = ""
650
+ if isinstance(expiry_date_raw, list) and expiry_date_raw:
651
+ expiry_date = str(expiry_date_raw[0]).strip()
652
+ elif expiry_date_raw:
653
+ expiry_date = str(expiry_date_raw).strip()
654
+
655
+ accreditation_number = ""
656
+ if isinstance(accreditation_number_raw, list) and accreditation_number_raw:
657
+ accreditation_number = str(accreditation_number_raw[0]).strip()
658
+ elif accreditation_number_raw:
659
+ accreditation_number = str(accreditation_number_raw).strip()
660
+
661
+ print(f"DEBUG: summary_text='{summary_text}'")
662
+ print(f"DEBUG: expiry_date='{expiry_date}'")
663
+ print(f"DEBUG: accreditation_number='{accreditation_number}'")
664
+ print(f"DEBUG: existing_acc='{existing_acc}'")
665
+ print(f"DEBUG: existing_exp='{existing_exp}'")
666
+
667
+ # Build the complete content
668
+ final_content = ""
669
+
670
+ if summary_text:
671
+ final_content = summary_text
672
+
673
+ # Determine which sub-labels to use (new from JSON or existing)
674
+ final_acc = accreditation_number if accreditation_number else existing_acc
675
+ final_exp = expiry_date if expiry_date else existing_exp
676
+
677
+ print(f"DEBUG: final_acc='{final_acc}'")
678
+ print(f"DEBUG: final_exp='{final_exp}'")
679
+
680
+ # Add sub-labels if any exist (new or preserved)
681
+ if final_acc or final_exp:
682
+ if final_content:
683
+ final_content += "\n\n" # Add spacing before sub-labels
684
+
685
+ if final_acc:
686
+ final_content += f"Accreditation Number: {final_acc}"
687
+ if final_exp:
688
+ final_content += "\n" # Add newline between sub-labels
689
+
690
+ if final_exp:
691
+ final_content += f"Expiry Date: {final_exp}"
692
+
693
+ print(f"DEBUG: final_content='{final_content}'")
694
+
695
+ else:
696
+ # Handle simple string/list input
697
+ if isinstance(value, list):
698
+ final_content = " ".join(str(v) for v in value if v)
699
+ else:
700
+ final_content = str(value) if value else ""
701
+
702
+ if not final_content:
703
+ return False
704
+
705
+ # COMPLETELY CLEAR THE CELL AND REWRITE IT
706
+ # Remove all paragraphs except the first one
707
+ while len(value_cell.paragraphs) > 1:
708
+ p = value_cell.paragraphs[-1]
709
+ p._element.getparent().remove(p._element)
710
+
711
+ # Clear the first paragraph completely
712
+ paragraph = value_cell.paragraphs[0]
713
+ for run in list(paragraph.runs):
714
+ run._element.getparent().remove(run._element)
715
+
716
+ # Split the content into lines and handle each properly
717
+ lines = final_content.split('\n')
718
+
719
+ # Write first line to existing paragraph
720
+ if lines:
721
+ first_run = paragraph.add_run(lines[0])
722
+ first_run.font.color.rgb = RGBColor(0, 0, 0) # Black color
723
+ try:
724
+ first_run.font.color.theme_color = None
725
+ except:
726
+ pass
727
+
728
+ # Add remaining lines as new paragraphs
729
+ for line in lines[1:]:
730
+ new_paragraph = value_cell.add_paragraph()
731
+ if line.strip(): # Non-empty line - add content
732
+ new_run = new_paragraph.add_run(line.strip())
733
+ new_run.font.color.rgb = RGBColor(0, 0, 0) # Black color
734
+ try:
735
+ new_run.font.color.theme_color = None
736
+ except:
737
+ pass
738
+ # If line is empty, the paragraph remains empty, creating spacing
739
+
740
  return True
741
 
 
742
  def _nuke_cell_paragraphs(cell: _Cell):
743
  """Remove ALL paragraphs from a cell (true delete, not just emptying runs)."""
744
  for p in list(cell.paragraphs):
 
757
  pass
758
 
759
  def _set_cell_text_black(cell, text: str):
760
+ """Clear a table cell and insert black text with line breaks after periods."""
761
  # remove text from all runs in all paragraphs
762
  for p in cell.paragraphs:
763
  for r in p.runs:
764
  r.text = ""
765
+
766
+ # Process text to add line breaks after periods
767
+ processed_text = str(text or "").strip()
768
+ if not processed_text:
769
+ p = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph()
770
+ r = p.add_run("")
771
+ r.font.color.rgb = BLACK
772
+ try:
773
+ r.font.color.theme_color = None
774
+ except Exception:
775
+ pass
776
+ return
777
+
778
+ # Split on periods followed by space, but keep the period with the sentence
779
+ import re
780
+ sentences = re.split(r'(\.\s+)', processed_text)
781
+
782
+ # Reconstruct sentences with periods
783
+ clean_sentences = []
784
+ for i in range(0, len(sentences), 2):
785
+ sentence = sentences[i]
786
+ if i + 1 < len(sentences) and sentences[i + 1].strip() == '.':
787
+ sentence += '.'
788
+ elif sentence.endswith('.'):
789
+ pass # already has period
790
+ clean_sentences.append(sentence.strip())
791
+
792
+ # Remove empty sentences
793
+ clean_sentences = [s for s in clean_sentences if s]
794
+
795
+ if not clean_sentences:
796
+ p = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph()
797
+ r = p.add_run(processed_text)
798
+ r.font.color.rgb = BLACK
799
+ try:
800
+ r.font.color.theme_color = None
801
+ except Exception:
802
+ pass
803
+ return
804
+
805
+ # Add first sentence to existing paragraph
806
  p = cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph()
807
+ r = p.add_run(clean_sentences[0])
808
  r.font.color.rgb = BLACK
809
  try:
810
  r.font.color.theme_color = None
811
  except Exception:
812
  pass
813
+
814
+ # Add remaining sentences as new paragraphs
815
+ for sentence in clean_sentences[1:]:
816
+ p = cell.add_paragraph()
817
+ r = p.add_run(sentence)
818
+ r.font.color.rgb = BLACK
819
+ try:
820
+ r.font.color.theme_color = None
821
+ except Exception:
822
+ pass
823
 
824
  def nz(x: Optional[str]) -> str:
825
  return (x or "").strip()
 
1421
  # 9) Nature of the Operators Business (Summary): write once (no duplicates)
1422
  biz = data.get("Nature of the Operators Business (Summary)", {})
1423
  if biz:
1424
+ update_business_summary_once(doc, biz) # Pass the entire dictionary
 
 
1425
 
1426
  # 10) Summary tables: FULL OVERWRITE of DETAILS from JSON
1427
  mm_sum = data.get("Maintenance Management Summary", {})