Spaces:
Running
Running
make Pydantic data validation more permissive
Browse files- api/models.py +40 -18
- core/text_parser.py +41 -7
api/models.py
CHANGED
|
@@ -121,39 +121,61 @@ class HBVPatientInput(BaseModel):
|
|
| 121 |
|
| 122 |
@validator('sex')
|
| 123 |
def validate_sex(cls, v):
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
@validator('pregnancy_status')
|
| 129 |
def validate_pregnancy(cls, v):
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
@validator('hbsag_status')
|
| 135 |
def validate_hbsag(cls, v):
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
@validator('hbeag_status')
|
| 141 |
def validate_hbeag(cls, v):
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
@validator('fibrosis_stage')
|
| 147 |
def validate_fibrosis(cls, v):
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
@validator('necroinflammatory_activity')
|
| 153 |
def validate_necroinflammatory(cls, v):
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
|
| 159 |
class HBVAssessmentResponse(BaseModel):
|
|
|
|
| 121 |
|
| 122 |
@validator('sex')
|
| 123 |
def validate_sex(cls, v):
|
| 124 |
+
value = str(v).strip().lower()
|
| 125 |
+
if value in ['male', 'm', 'man', 'boy']:
|
| 126 |
+
return 'Male'
|
| 127 |
+
if value in ['female', 'f', 'woman', 'girl']:
|
| 128 |
+
return 'Female'
|
| 129 |
+
raise ValueError('Sex must be either Male or Female')
|
| 130 |
|
| 131 |
@validator('pregnancy_status')
|
| 132 |
def validate_pregnancy(cls, v):
|
| 133 |
+
value = str(v).strip().lower()
|
| 134 |
+
if value in ['pregnant', 'yes', 'y']:
|
| 135 |
+
return 'Pregnant'
|
| 136 |
+
if value in ['not pregnant', 'non-pregnant', 'non pregnant', 'no', 'n', 'none']:
|
| 137 |
+
return 'Not pregnant'
|
| 138 |
+
raise ValueError('Pregnancy status must be either "Not pregnant" or "Pregnant"')
|
| 139 |
|
| 140 |
@validator('hbsag_status')
|
| 141 |
def validate_hbsag(cls, v):
|
| 142 |
+
value = str(v).strip().lower()
|
| 143 |
+
if value in ['positive', 'pos', '+', 'reactive']:
|
| 144 |
+
return 'Positive'
|
| 145 |
+
if value in ['negative', 'neg', '-', 'non-reactive', 'nonreactive']:
|
| 146 |
+
return 'Negative'
|
| 147 |
+
raise ValueError('HBsAg status must be either Positive or Negative')
|
| 148 |
|
| 149 |
@validator('hbeag_status')
|
| 150 |
def validate_hbeag(cls, v):
|
| 151 |
+
value = str(v).strip().lower()
|
| 152 |
+
if value in ['positive', 'pos', '+', 'reactive']:
|
| 153 |
+
return 'Positive'
|
| 154 |
+
if value in ['negative', 'neg', '-', 'non-reactive', 'nonreactive']:
|
| 155 |
+
return 'Negative'
|
| 156 |
+
raise ValueError('HBeAg status must be either Positive or Negative')
|
| 157 |
|
| 158 |
@validator('fibrosis_stage')
|
| 159 |
def validate_fibrosis(cls, v):
|
| 160 |
+
value = str(v).strip().upper().replace(" ", "")
|
| 161 |
+
if value in ['F0-F1', 'F2-F3', 'F4']:
|
| 162 |
+
return value
|
| 163 |
+
if value in ['F0', 'F1']:
|
| 164 |
+
return 'F0-F1'
|
| 165 |
+
if value in ['F2', 'F3']:
|
| 166 |
+
return 'F2-F3'
|
| 167 |
+
if value == 'F4':
|
| 168 |
+
return 'F4'
|
| 169 |
+
raise ValueError('Fibrosis stage must be F0-F1, F2-F3, or F4 (or F0, F1, F2, F3, F4 which will be mapped to these categories)')
|
| 170 |
|
| 171 |
@validator('necroinflammatory_activity')
|
| 172 |
def validate_necroinflammatory(cls, v):
|
| 173 |
+
value = str(v).strip().upper().replace(" ", "")
|
| 174 |
+
if value in ['A0', 'A1', 'A2', 'A3']:
|
| 175 |
+
return value
|
| 176 |
+
if value in ['0', '1', '2', '3']:
|
| 177 |
+
return 'A' + value
|
| 178 |
+
raise ValueError('Necroinflammatory activity must be A0, A1, A2, or A3')
|
| 179 |
|
| 180 |
|
| 181 |
class HBVAssessmentResponse(BaseModel):
|
core/text_parser.py
CHANGED
|
@@ -172,23 +172,57 @@ def validate_extracted_data(data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 172 |
except (ValueError, TypeError) as e:
|
| 173 |
raise ValueError(f"Invalid data type in extracted data: {str(e)}")
|
| 174 |
|
| 175 |
-
# Validate enum values
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
raise ValueError(f"Invalid sex value: {data['sex']}")
|
| 178 |
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
raise ValueError(f"Invalid pregnancy_status value: {data['pregnancy_status']}")
|
| 181 |
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
raise ValueError(f"Invalid hbsag_status value: {data['hbsag_status']}")
|
| 184 |
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
raise ValueError(f"Invalid hbeag_status value: {data['hbeag_status']}")
|
| 187 |
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
raise ValueError(f"Invalid fibrosis_stage value: {data['fibrosis_stage']}")
|
| 190 |
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
raise ValueError(f"Invalid necroinflammatory_activity value: {data['necroinflammatory_activity']}")
|
| 193 |
|
| 194 |
return data
|
|
|
|
| 172 |
except (ValueError, TypeError) as e:
|
| 173 |
raise ValueError(f"Invalid data type in extracted data: {str(e)}")
|
| 174 |
|
| 175 |
+
# Validate and normalize enum/string values to canonical forms
|
| 176 |
+
sex_raw = str(data['sex']).strip().lower()
|
| 177 |
+
if sex_raw in ['male', 'm', 'man', 'boy']:
|
| 178 |
+
data['sex'] = 'Male'
|
| 179 |
+
elif sex_raw in ['female', 'f', 'woman', 'girl']:
|
| 180 |
+
data['sex'] = 'Female'
|
| 181 |
+
else:
|
| 182 |
raise ValueError(f"Invalid sex value: {data['sex']}")
|
| 183 |
|
| 184 |
+
preg_raw = str(data['pregnancy_status']).strip().lower()
|
| 185 |
+
if preg_raw in ['pregnant', 'yes', 'y']:
|
| 186 |
+
data['pregnancy_status'] = 'Pregnant'
|
| 187 |
+
elif preg_raw in ['not pregnant', 'non-pregnant', 'non pregnant', 'no', 'n', 'none']:
|
| 188 |
+
data['pregnancy_status'] = 'Not pregnant'
|
| 189 |
+
else:
|
| 190 |
raise ValueError(f"Invalid pregnancy_status value: {data['pregnancy_status']}")
|
| 191 |
|
| 192 |
+
hbsag_raw = str(data['hbsag_status']).strip().lower()
|
| 193 |
+
if hbsag_raw in ['positive', 'pos', '+', 'reactive']:
|
| 194 |
+
data['hbsag_status'] = 'Positive'
|
| 195 |
+
elif hbsag_raw in ['negative', 'neg', '-', 'non-reactive', 'nonreactive']:
|
| 196 |
+
data['hbsag_status'] = 'Negative'
|
| 197 |
+
else:
|
| 198 |
raise ValueError(f"Invalid hbsag_status value: {data['hbsag_status']}")
|
| 199 |
|
| 200 |
+
hbeag_raw = str(data['hbeag_status']).strip().lower()
|
| 201 |
+
if hbeag_raw in ['positive', 'pos', '+', 'reactive']:
|
| 202 |
+
data['hbeag_status'] = 'Positive'
|
| 203 |
+
elif hbeag_raw in ['negative', 'neg', '-', 'non-reactive', 'nonreactive']:
|
| 204 |
+
data['hbeag_status'] = 'Negative'
|
| 205 |
+
else:
|
| 206 |
raise ValueError(f"Invalid hbeag_status value: {data['hbeag_status']}")
|
| 207 |
|
| 208 |
+
fibrosis_raw = str(data['fibrosis_stage']).strip().upper().replace(" ", "")
|
| 209 |
+
if fibrosis_raw in ['F0-F1', 'F2-F3', 'F4']:
|
| 210 |
+
data['fibrosis_stage'] = fibrosis_raw
|
| 211 |
+
elif fibrosis_raw in ['F0', 'F1']:
|
| 212 |
+
data['fibrosis_stage'] = 'F0-F1'
|
| 213 |
+
elif fibrosis_raw in ['F2', 'F3']:
|
| 214 |
+
data['fibrosis_stage'] = 'F2-F3'
|
| 215 |
+
elif fibrosis_raw == 'F4':
|
| 216 |
+
data['fibrosis_stage'] = 'F4'
|
| 217 |
+
else:
|
| 218 |
raise ValueError(f"Invalid fibrosis_stage value: {data['fibrosis_stage']}")
|
| 219 |
|
| 220 |
+
necro_raw = str(data['necroinflammatory_activity']).strip().upper().replace(" ", "")
|
| 221 |
+
if necro_raw in ['A0', 'A1', 'A2', 'A3']:
|
| 222 |
+
data['necroinflammatory_activity'] = necro_raw
|
| 223 |
+
elif necro_raw in ['0', '1', '2', '3']:
|
| 224 |
+
data['necroinflammatory_activity'] = 'A' + necro_raw
|
| 225 |
+
else:
|
| 226 |
raise ValueError(f"Invalid necroinflammatory_activity value: {data['necroinflammatory_activity']}")
|
| 227 |
|
| 228 |
return data
|