Spaces:
Running
Running
Update master_key.py
Browse files- master_key.py +23 -9
master_key.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
Improved Master Key for NHVAS Audit extraction:
|
| 3 |
-
- TABLE_SCHEMAS: Enhanced definitions with better matching criteria
|
| 4 |
- HEADING_PATTERNS: Improved regex patterns for main/sub headings
|
| 5 |
- PARAGRAPH_PATTERNS: Enhanced patterns for key narrative sections
|
| 6 |
"""
|
|
@@ -99,6 +99,8 @@ TABLE_SCHEMAS = {
|
|
| 99 |
"labels": ["Title", "Abbreviation", "Description"],
|
| 100 |
"priority": 80
|
| 101 |
},
|
|
|
|
|
|
|
| 102 |
"Maintenance Management": {
|
| 103 |
"headings": [
|
| 104 |
{"level": 1, "text": "NHVAS AUDIT SUMMARY REPORT"}
|
|
@@ -115,7 +117,8 @@ TABLE_SCHEMAS = {
|
|
| 115 |
"Std 8. Training and Education"
|
| 116 |
],
|
| 117 |
"priority": 60,
|
| 118 |
-
"context_keywords": ["maintenance"]
|
|
|
|
| 119 |
},
|
| 120 |
"Mass Management": {
|
| 121 |
"headings": [
|
|
@@ -133,7 +136,8 @@ TABLE_SCHEMAS = {
|
|
| 133 |
"Std 8. Maintenance of Suspension"
|
| 134 |
],
|
| 135 |
"priority": 60,
|
| 136 |
-
"context_keywords": ["mass"]
|
|
|
|
| 137 |
},
|
| 138 |
"Fatigue Management": {
|
| 139 |
"headings": [
|
|
@@ -150,8 +154,11 @@ TABLE_SCHEMAS = {
|
|
| 150 |
"Std 7. Workplace conditions"
|
| 151 |
],
|
| 152 |
"priority": 60,
|
| 153 |
-
"context_keywords": ["fatigue"]
|
|
|
|
| 154 |
},
|
|
|
|
|
|
|
| 155 |
"Maintenance Management Summary": {
|
| 156 |
"headings": [
|
| 157 |
{"level": 1, "text": "Audit Observations and Comments"},
|
|
@@ -169,7 +176,8 @@ TABLE_SCHEMAS = {
|
|
| 169 |
"Std 7. Internal Review",
|
| 170 |
"Std 8. Training and Education"
|
| 171 |
],
|
| 172 |
-
"priority":
|
|
|
|
| 173 |
},
|
| 174 |
"Mass Management Summary": {
|
| 175 |
"headings": [
|
|
@@ -179,15 +187,16 @@ TABLE_SCHEMAS = {
|
|
| 179 |
"columns": ["MASS MANAGEMENT", "DETAILS"],
|
| 180 |
"labels": [
|
| 181 |
"Std 1. Responsibilities",
|
| 182 |
-
"Std 2. Vehicle Control",
|
| 183 |
"Std 3. Vehicle Use",
|
| 184 |
-
"Std 4. Records and Documentation",
|
| 185 |
"Std 5. Verification",
|
| 186 |
"Std 6. Internal Review",
|
| 187 |
"Std 7. Training and Education",
|
| 188 |
"Std 8. Maintenance of Suspension"
|
| 189 |
],
|
| 190 |
-
"priority":
|
|
|
|
| 191 |
},
|
| 192 |
"Fatigue Management Summary": {
|
| 193 |
"headings": [
|
|
@@ -204,8 +213,11 @@ TABLE_SCHEMAS = {
|
|
| 204 |
"Std 6. Records and Documentation",
|
| 205 |
"Std 7. Workplace conditions"
|
| 206 |
],
|
| 207 |
-
"priority":
|
|
|
|
| 208 |
},
|
|
|
|
|
|
|
| 209 |
"Vehicle Registration Numbers Maintenance": {
|
| 210 |
"headings": [
|
| 211 |
{"level": 1, "text": "Vehicle Registration Numbers of Records Examined"},
|
|
@@ -256,6 +268,8 @@ TABLE_SCHEMAS = {
|
|
| 256 |
"priority": 80,
|
| 257 |
"context_keywords": ["driver", "scheduler", "fatigue"]
|
| 258 |
},
|
|
|
|
|
|
|
| 259 |
"Operator's Name (legal entity)": {
|
| 260 |
"headings": [
|
| 261 |
{"level": 1, "text": "CORRECTIVE ACTION REQUEST (CAR)"}
|
|
|
|
| 1 |
"""
|
| 2 |
Improved Master Key for NHVAS Audit extraction:
|
| 3 |
+
- TABLE_SCHEMAS: Enhanced definitions with better matching criteria for Summary vs Basic tables
|
| 4 |
- HEADING_PATTERNS: Improved regex patterns for main/sub headings
|
| 5 |
- PARAGRAPH_PATTERNS: Enhanced patterns for key narrative sections
|
| 6 |
"""
|
|
|
|
| 99 |
"labels": ["Title", "Abbreviation", "Description"],
|
| 100 |
"priority": 80
|
| 101 |
},
|
| 102 |
+
|
| 103 |
+
# 🎯 BASIC MANAGEMENT SCHEMAS (Compliance Tables - Lower Priority)
|
| 104 |
"Maintenance Management": {
|
| 105 |
"headings": [
|
| 106 |
{"level": 1, "text": "NHVAS AUDIT SUMMARY REPORT"}
|
|
|
|
| 117 |
"Std 8. Training and Education"
|
| 118 |
],
|
| 119 |
"priority": 60,
|
| 120 |
+
"context_keywords": ["maintenance"],
|
| 121 |
+
"context_exclusions": ["summary", "details", "audit findings"] # Exclude Summary tables
|
| 122 |
},
|
| 123 |
"Mass Management": {
|
| 124 |
"headings": [
|
|
|
|
| 136 |
"Std 8. Maintenance of Suspension"
|
| 137 |
],
|
| 138 |
"priority": 60,
|
| 139 |
+
"context_keywords": ["mass"],
|
| 140 |
+
"context_exclusions": ["summary", "details", "audit findings"] # Exclude Summary tables
|
| 141 |
},
|
| 142 |
"Fatigue Management": {
|
| 143 |
"headings": [
|
|
|
|
| 154 |
"Std 7. Workplace conditions"
|
| 155 |
],
|
| 156 |
"priority": 60,
|
| 157 |
+
"context_keywords": ["fatigue"],
|
| 158 |
+
"context_exclusions": ["summary", "details", "audit findings"] # Exclude Summary tables
|
| 159 |
},
|
| 160 |
+
|
| 161 |
+
# 🎯 SUMMARY MANAGEMENT SCHEMAS (Detailed Tables with DETAILS column - Higher Priority)
|
| 162 |
"Maintenance Management Summary": {
|
| 163 |
"headings": [
|
| 164 |
{"level": 1, "text": "Audit Observations and Comments"},
|
|
|
|
| 176 |
"Std 7. Internal Review",
|
| 177 |
"Std 8. Training and Education"
|
| 178 |
],
|
| 179 |
+
"priority": 85, # Higher priority than basic Maintenance Management
|
| 180 |
+
"context_keywords": ["maintenance", "summary", "details", "audit findings"]
|
| 181 |
},
|
| 182 |
"Mass Management Summary": {
|
| 183 |
"headings": [
|
|
|
|
| 187 |
"columns": ["MASS MANAGEMENT", "DETAILS"],
|
| 188 |
"labels": [
|
| 189 |
"Std 1. Responsibilities",
|
| 190 |
+
"Std 2. Vehicle Control",
|
| 191 |
"Std 3. Vehicle Use",
|
| 192 |
+
"Std 4. Records and Documentation",
|
| 193 |
"Std 5. Verification",
|
| 194 |
"Std 6. Internal Review",
|
| 195 |
"Std 7. Training and Education",
|
| 196 |
"Std 8. Maintenance of Suspension"
|
| 197 |
],
|
| 198 |
+
"priority": 85, # Higher priority than basic Mass Management
|
| 199 |
+
"context_keywords": ["mass", "summary", "details", "audit findings"]
|
| 200 |
},
|
| 201 |
"Fatigue Management Summary": {
|
| 202 |
"headings": [
|
|
|
|
| 213 |
"Std 6. Records and Documentation",
|
| 214 |
"Std 7. Workplace conditions"
|
| 215 |
],
|
| 216 |
+
"priority": 85, # Higher priority than basic Fatigue Management
|
| 217 |
+
"context_keywords": ["fatigue", "summary", "details", "audit findings"]
|
| 218 |
},
|
| 219 |
+
|
| 220 |
+
# Vehicle Registration Tables
|
| 221 |
"Vehicle Registration Numbers Maintenance": {
|
| 222 |
"headings": [
|
| 223 |
{"level": 1, "text": "Vehicle Registration Numbers of Records Examined"},
|
|
|
|
| 268 |
"priority": 80,
|
| 269 |
"context_keywords": ["driver", "scheduler", "fatigue"]
|
| 270 |
},
|
| 271 |
+
|
| 272 |
+
# Other Tables
|
| 273 |
"Operator's Name (legal entity)": {
|
| 274 |
"headings": [
|
| 275 |
{"level": 1, "text": "CORRECTIVE ACTION REQUEST (CAR)"}
|