Shami96 commited on
Commit
c60ddb7
Β·
verified Β·
1 Parent(s): f5393f7

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +136 -32
updated_word.py CHANGED
@@ -570,13 +570,16 @@ def handle_attendance_list_table_enhanced(table, flat_json):
570
  return replacements_made
571
 
572
  def fix_management_summary_details_column(table, flat_json):
573
- """Preserve behavior but prefer scoped mgmt dicts."""
574
  replacements_made = 0
575
  print(f" 🎯 FIX: Management Summary DETAILS column processing")
 
 
576
  table_text = ""
577
  for row in table.rows[:3]:
578
  for cell in row.cells:
579
  table_text += get_clean_text(cell).lower() + " "
 
580
  mgmt_types = []
581
  if "mass management" in table_text or "mass" in table_text:
582
  mgmt_types.append("Mass Management Summary")
@@ -584,65 +587,125 @@ def fix_management_summary_details_column(table, flat_json):
584
  mgmt_types.append("Maintenance Management Summary")
585
  if "fatigue management" in table_text or "fatigue" in table_text:
586
  mgmt_types.append("Fatigue Management Summary")
 
 
587
  if not mgmt_types:
588
  if any("std 5" in get_clean_text(c).lower() for r in table.rows for c in r.cells):
589
  mgmt_types.append("Mass Management Summary")
 
590
  if not mgmt_types:
 
591
  return 0
 
592
  for mgmt_type in mgmt_types:
593
  print(f" βœ… Confirmed {mgmt_type} table processing")
594
- mgmt_data = flat_json.get(mgmt_type)
595
- if not isinstance(mgmt_data, dict):
 
 
 
 
 
 
 
 
596
  for key in flat_json.keys():
597
- if mgmt_type.split()[0].lower() in key.lower() and "summary" in key.lower():
598
- mgmt_data = flat_json.get(key)
 
 
 
599
  break
600
- if not isinstance(mgmt_data, dict):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
  print(f" ⚠️ No JSON management dict found for {mgmt_type}, skipping this type")
602
  continue
 
 
603
  for row_idx, row in enumerate(table.rows):
604
  if len(row.cells) >= 2:
605
  standard_cell = row.cells[0]
606
  details_cell = row.cells[1]
607
  standard_text = get_clean_text(standard_cell).strip().lower()
 
 
 
 
 
 
608
  if "std 5" in standard_text or "verification" in standard_text:
609
  if has_red_text(details_cell):
610
- std_val = None
611
- for candidate in ("Std 5. Verification", "Std 5 Verification", "Std 5", "Verification"):
612
- std_val = mgmt_data.get(candidate)
613
- if std_val is not None:
614
- break
615
- if std_val is None:
616
- for k, v in mgmt_data.items():
617
- if 'std 5' in k.lower() or 'verification' in k.lower():
618
- std_val = v
619
- break
620
- if std_val is not None:
621
  replacement_text = get_value_as_string(std_val, "Std 5. Verification")
622
  cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
623
  replacements_made += cell_replacements
624
  if cell_replacements:
625
  print(f" βœ… Replaced Std 5. Verification details for {mgmt_type}")
626
- if "std 6" in standard_text or "internal review" in standard_text:
 
627
  if has_red_text(details_cell):
628
- std_val = None
629
- for candidate in ("Std 6. Internal Review", "Std 6 Internal Review", "Std 6", "Internal Review"):
630
- std_val = mgmt_data.get(candidate)
631
- if std_val is not None:
632
- break
633
- if std_val is None:
634
- for k, v in mgmt_data.items():
635
- if 'std 6' in k.lower() or 'internal review' in k.lower():
636
- std_val = v
637
- break
638
- if std_val is not None:
639
  replacement_text = get_value_as_string(std_val, "Std 6. Internal Review")
640
  cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
641
  replacements_made += cell_replacements
642
  if cell_replacements:
643
  print(f" βœ… Replaced Std 6. Internal Review details for {mgmt_type}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
  return replacements_made
645
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
  # ============================================================================
647
  # Canonical operator declaration fixer β€” SAFER
648
  # ============================================================================
@@ -1282,13 +1345,40 @@ def process_paragraphs(document, flat_json):
1282
  return replacements_made
1283
 
1284
  def process_headings(document, flat_json):
 
 
 
1285
  replacements_made = 0
1286
  print(f"\nπŸ” Processing headings:")
1287
  paragraphs = document.paragraphs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1288
  for para_idx, paragraph in enumerate(paragraphs):
1289
  paragraph_text = paragraph.text.strip()
1290
  if not paragraph_text:
1291
  continue
 
1292
  matched_heading = None
1293
  for category, patterns in HEADING_PATTERNS.items():
1294
  for pattern in patterns:
@@ -1297,20 +1387,29 @@ def process_headings(document, flat_json):
1297
  break
1298
  if matched_heading:
1299
  break
 
1300
  if matched_heading:
1301
  print(f" πŸ“Œ Found heading at paragraph {para_idx + 1}: '{paragraph_text}'")
 
 
1302
  if has_red_text_in_paragraph(paragraph):
1303
  print(f" πŸ”΄ Found red text in heading itself")
1304
- heading_replacements = process_red_text_in_paragraph(paragraph, paragraph_text, flat_json)
1305
  replacements_made += heading_replacements
 
 
1306
  for next_para_offset in range(1, 6):
1307
  next_para_idx = para_idx + next_para_offset
1308
  if next_para_idx >= len(paragraphs):
1309
  break
 
1310
  next_paragraph = paragraphs[next_para_idx]
1311
  next_text = next_paragraph.text.strip()
 
1312
  if not next_text:
1313
  continue
 
 
1314
  is_another_heading = False
1315
  for category, patterns in HEADING_PATTERNS.items():
1316
  for pattern in patterns:
@@ -1319,18 +1418,23 @@ def process_headings(document, flat_json):
1319
  break
1320
  if is_another_heading:
1321
  break
 
1322
  if is_another_heading:
1323
  break
 
1324
  if has_red_text_in_paragraph(next_paragraph):
1325
  print(f" πŸ”΄ Found red text in paragraph {next_para_idx + 1} after heading")
1326
- context_replacements = process_red_text_in_paragraph(
1327
  next_paragraph,
1328
  paragraph_text,
1329
- flat_json
 
1330
  )
1331
  replacements_made += context_replacements
 
1332
  return replacements_made
1333
 
 
1334
  def process_red_text_in_paragraph(paragraph, context_text, flat_json):
1335
  replacements_made = 0
1336
  red_text_segments = []
 
570
  return replacements_made
571
 
572
  def fix_management_summary_details_column(table, flat_json):
573
+ """Enhanced management summary processing with better data matching"""
574
  replacements_made = 0
575
  print(f" 🎯 FIX: Management Summary DETAILS column processing")
576
+
577
+ # Determine which type of management summary this is
578
  table_text = ""
579
  for row in table.rows[:3]:
580
  for cell in row.cells:
581
  table_text += get_clean_text(cell).lower() + " "
582
+
583
  mgmt_types = []
584
  if "mass management" in table_text or "mass" in table_text:
585
  mgmt_types.append("Mass Management Summary")
 
587
  mgmt_types.append("Maintenance Management Summary")
588
  if "fatigue management" in table_text or "fatigue" in table_text:
589
  mgmt_types.append("Fatigue Management Summary")
590
+
591
+ # Fallback detection
592
  if not mgmt_types:
593
  if any("std 5" in get_clean_text(c).lower() for r in table.rows for c in r.cells):
594
  mgmt_types.append("Mass Management Summary")
595
+
596
  if not mgmt_types:
597
+ print(f" ⚠️ Could not determine management summary type")
598
  return 0
599
+
600
  for mgmt_type in mgmt_types:
601
  print(f" βœ… Confirmed {mgmt_type} table processing")
602
+
603
+ # Look for management data in the JSON
604
+ mgmt_data = None
605
+
606
+ # Try direct key match first
607
+ if mgmt_type in flat_json:
608
+ mgmt_data = flat_json[mgmt_type]
609
+
610
+ # Try variations of the key
611
+ if not mgmt_data:
612
  for key in flat_json.keys():
613
+ key_lower = key.lower()
614
+ mgmt_lower = mgmt_type.lower()
615
+ if mgmt_lower in key_lower or key_lower in mgmt_lower:
616
+ mgmt_data = flat_json[key]
617
+ print(f" βœ… Found data using key variation: '{key}'")
618
  break
619
+
620
+ # If still no data, look for individual standard data
621
+ if not mgmt_data:
622
+ # Collect individual standard entries
623
+ mgmt_data = {}
624
+ for key, value in flat_json.items():
625
+ key_lower = key.lower()
626
+ # Look for standard entries related to this management type
627
+ if ("std " in key_lower and
628
+ (("mass" in mgmt_type.lower() and any(term in key_lower for term in ["verification", "internal review"])) or
629
+ ("maintenance" in mgmt_type.lower() and any(term in key_lower for term in ["daily check", "internal review"])) or
630
+ ("fatigue" in mgmt_type.lower() and any(term in key_lower for term in ["internal review"])))):
631
+ mgmt_data[key] = value
632
+
633
+ if mgmt_data:
634
+ print(f" βœ… Collected individual standard data: {list(mgmt_data.keys())}")
635
+
636
+ if not mgmt_data or not isinstance(mgmt_data, dict):
637
  print(f" ⚠️ No JSON management dict found for {mgmt_type}, skipping this type")
638
  continue
639
+
640
+ # Process the table rows
641
  for row_idx, row in enumerate(table.rows):
642
  if len(row.cells) >= 2:
643
  standard_cell = row.cells[0]
644
  details_cell = row.cells[1]
645
  standard_text = get_clean_text(standard_cell).strip().lower()
646
+
647
+ # Skip header rows
648
+ if "standard" in standard_text or "requirement" in standard_text or "details" in standard_text:
649
+ continue
650
+
651
+ # Look for specific standards
652
  if "std 5" in standard_text or "verification" in standard_text:
653
  if has_red_text(details_cell):
654
+ std_val = find_best_standard_value(mgmt_data, ["Std 5. Verification", "Std 5 Verification", "Std 5", "Verification"])
655
+ if std_val:
 
 
 
 
 
 
 
 
 
656
  replacement_text = get_value_as_string(std_val, "Std 5. Verification")
657
  cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
658
  replacements_made += cell_replacements
659
  if cell_replacements:
660
  print(f" βœ… Replaced Std 5. Verification details for {mgmt_type}")
661
+
662
+ elif "std 6" in standard_text or "internal review" in standard_text:
663
  if has_red_text(details_cell):
664
+ std_val = find_best_standard_value(mgmt_data, ["Std 6. Internal Review", "Std 6 Internal Review", "Std 6", "Internal Review"])
665
+ if std_val:
 
 
 
 
 
 
 
 
 
666
  replacement_text = get_value_as_string(std_val, "Std 6. Internal Review")
667
  cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
668
  replacements_made += cell_replacements
669
  if cell_replacements:
670
  print(f" βœ… Replaced Std 6. Internal Review details for {mgmt_type}")
671
+
672
+ elif "std 1" in standard_text or "daily check" in standard_text:
673
+ if has_red_text(details_cell):
674
+ std_val = find_best_standard_value(mgmt_data, ["Std 1. Daily Check", "Std 1 Daily Check", "Std 1", "Daily Check"])
675
+ if std_val:
676
+ replacement_text = get_value_as_string(std_val, "Std 1. Daily Check")
677
+ cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
678
+ replacements_made += cell_replacements
679
+ if cell_replacements:
680
+ print(f" βœ… Replaced Std 1. Daily Check details for {mgmt_type}")
681
+
682
+ elif "std 7" in standard_text:
683
+ if has_red_text(details_cell):
684
+ std_val = find_best_standard_value(mgmt_data, ["Std 7. Internal Review", "Std 7 Internal Review", "Std 7"])
685
+ if std_val:
686
+ replacement_text = get_value_as_string(std_val, "Std 7. Internal Review")
687
+ cell_replacements = replace_red_text_in_cell(details_cell, replacement_text)
688
+ replacements_made += cell_replacements
689
+ if cell_replacements:
690
+ print(f" βœ… Replaced Std 7. Internal Review details for {mgmt_type}")
691
+
692
  return replacements_made
693
 
694
+
695
+ def find_best_standard_value(mgmt_data, candidate_keys):
696
+ """Find the best matching value for a standard from management data"""
697
+ for candidate in candidate_keys:
698
+ if candidate in mgmt_data:
699
+ return mgmt_data[candidate]
700
+
701
+ # Try fuzzy matching
702
+ for key, value in mgmt_data.items():
703
+ for candidate in candidate_keys:
704
+ if candidate.lower() in key.lower() or key.lower() in candidate.lower():
705
+ return value
706
+
707
+ return None
708
+
709
  # ============================================================================
710
  # Canonical operator declaration fixer β€” SAFER
711
  # ============================================================================
 
1345
  return replacements_made
1346
 
1347
  def process_headings(document, flat_json):
1348
+ """
1349
+ IMPROVED: Better heading processing that avoids mixing company data
1350
+ """
1351
  replacements_made = 0
1352
  print(f"\nπŸ” Processing headings:")
1353
  paragraphs = document.paragraphs
1354
+
1355
+ # Extract the correct operator name from the JSON data
1356
+ operator_name = None
1357
+ for key, value in flat_json.items():
1358
+ if "operator name" in key.lower() and "legal entity" in key.lower():
1359
+ if isinstance(value, list) and value:
1360
+ operator_name = str(value[0]).strip()
1361
+ else:
1362
+ operator_name = str(value).strip()
1363
+ break
1364
+
1365
+ if not operator_name:
1366
+ # Fallback - try other operator name keys
1367
+ for key, value in flat_json.items():
1368
+ if ("operator" in key.lower() and "name" in key.lower()) or key.lower() == "operator name":
1369
+ if isinstance(value, list) and value:
1370
+ operator_name = str(value[0]).strip()
1371
+ elif value:
1372
+ operator_name = str(value).strip()
1373
+ break
1374
+
1375
+ print(f" πŸ“‹ Using operator name: '{operator_name}'")
1376
+
1377
  for para_idx, paragraph in enumerate(paragraphs):
1378
  paragraph_text = paragraph.text.strip()
1379
  if not paragraph_text:
1380
  continue
1381
+
1382
  matched_heading = None
1383
  for category, patterns in HEADING_PATTERNS.items():
1384
  for pattern in patterns:
 
1387
  break
1388
  if matched_heading:
1389
  break
1390
+
1391
  if matched_heading:
1392
  print(f" πŸ“Œ Found heading at paragraph {para_idx + 1}: '{paragraph_text}'")
1393
+
1394
+ # Check if the heading itself has red text
1395
  if has_red_text_in_paragraph(paragraph):
1396
  print(f" πŸ”΄ Found red text in heading itself")
1397
+ heading_replacements = process_red_text_in_heading_paragraph(paragraph, paragraph_text, flat_json, operator_name)
1398
  replacements_made += heading_replacements
1399
+
1400
+ # Look for red text in paragraphs immediately following this heading
1401
  for next_para_offset in range(1, 6):
1402
  next_para_idx = para_idx + next_para_offset
1403
  if next_para_idx >= len(paragraphs):
1404
  break
1405
+
1406
  next_paragraph = paragraphs[next_para_idx]
1407
  next_text = next_paragraph.text.strip()
1408
+
1409
  if not next_text:
1410
  continue
1411
+
1412
+ # Stop if we hit another heading
1413
  is_another_heading = False
1414
  for category, patterns in HEADING_PATTERNS.items():
1415
  for pattern in patterns:
 
1418
  break
1419
  if is_another_heading:
1420
  break
1421
+
1422
  if is_another_heading:
1423
  break
1424
+
1425
  if has_red_text_in_paragraph(next_paragraph):
1426
  print(f" πŸ”΄ Found red text in paragraph {next_para_idx + 1} after heading")
1427
+ context_replacements = process_red_text_in_context_paragraph(
1428
  next_paragraph,
1429
  paragraph_text,
1430
+ flat_json,
1431
+ operator_name
1432
  )
1433
  replacements_made += context_replacements
1434
+
1435
  return replacements_made
1436
 
1437
+
1438
  def process_red_text_in_paragraph(paragraph, context_text, flat_json):
1439
  replacements_made = 0
1440
  red_text_segments = []