moazx commited on
Commit
23806d1
·
1 Parent(s): 8e013e3

make Pydantic data validation more permissive

Browse files
Files changed (2) hide show
  1. api/models.py +40 -18
  2. core/text_parser.py +41 -7
api/models.py CHANGED
@@ -121,39 +121,61 @@ class HBVPatientInput(BaseModel):
121
 
122
  @validator('sex')
123
  def validate_sex(cls, v):
124
- if v not in ['Male', 'Female']:
125
- raise ValueError('Sex must be either Male or Female')
126
- return v
 
 
 
127
 
128
  @validator('pregnancy_status')
129
  def validate_pregnancy(cls, v):
130
- if v not in ['Not pregnant', 'Pregnant']:
131
- raise ValueError('Pregnancy status must be either "Not pregnant" or "Pregnant"')
132
- return v
 
 
 
133
 
134
  @validator('hbsag_status')
135
  def validate_hbsag(cls, v):
136
- if v not in ['Positive', 'Negative']:
137
- raise ValueError('HBsAg status must be either Positive or Negative')
138
- return v
 
 
 
139
 
140
  @validator('hbeag_status')
141
  def validate_hbeag(cls, v):
142
- if v not in ['Positive', 'Negative']:
143
- raise ValueError('HBeAg status must be either Positive or Negative')
144
- return v
 
 
 
145
 
146
  @validator('fibrosis_stage')
147
  def validate_fibrosis(cls, v):
148
- if v not in ['F0-F1', 'F2-F3', 'F4']:
149
- raise ValueError('Fibrosis stage must be F0-F1, F2-F3, or F4')
150
- return v
 
 
 
 
 
 
 
151
 
152
  @validator('necroinflammatory_activity')
153
  def validate_necroinflammatory(cls, v):
154
- if v not in ['A0', 'A1', 'A2', 'A3']:
155
- raise ValueError('Necroinflammatory activity must be A0, A1, A2, or A3')
156
- return v
 
 
 
157
 
158
 
159
  class HBVAssessmentResponse(BaseModel):
 
121
 
122
  @validator('sex')
123
  def validate_sex(cls, v):
124
+ value = str(v).strip().lower()
125
+ if value in ['male', 'm', 'man', 'boy']:
126
+ return 'Male'
127
+ if value in ['female', 'f', 'woman', 'girl']:
128
+ return 'Female'
129
+ raise ValueError('Sex must be either Male or Female')
130
 
131
  @validator('pregnancy_status')
132
  def validate_pregnancy(cls, v):
133
+ value = str(v).strip().lower()
134
+ if value in ['pregnant', 'yes', 'y']:
135
+ return 'Pregnant'
136
+ if value in ['not pregnant', 'non-pregnant', 'non pregnant', 'no', 'n', 'none']:
137
+ return 'Not pregnant'
138
+ raise ValueError('Pregnancy status must be either "Not pregnant" or "Pregnant"')
139
 
140
  @validator('hbsag_status')
141
  def validate_hbsag(cls, v):
142
+ value = str(v).strip().lower()
143
+ if value in ['positive', 'pos', '+', 'reactive']:
144
+ return 'Positive'
145
+ if value in ['negative', 'neg', '-', 'non-reactive', 'nonreactive']:
146
+ return 'Negative'
147
+ raise ValueError('HBsAg status must be either Positive or Negative')
148
 
149
  @validator('hbeag_status')
150
  def validate_hbeag(cls, v):
151
+ value = str(v).strip().lower()
152
+ if value in ['positive', 'pos', '+', 'reactive']:
153
+ return 'Positive'
154
+ if value in ['negative', 'neg', '-', 'non-reactive', 'nonreactive']:
155
+ return 'Negative'
156
+ raise ValueError('HBeAg status must be either Positive or Negative')
157
 
158
  @validator('fibrosis_stage')
159
  def validate_fibrosis(cls, v):
160
+ value = str(v).strip().upper().replace(" ", "")
161
+ if value in ['F0-F1', 'F2-F3', 'F4']:
162
+ return value
163
+ if value in ['F0', 'F1']:
164
+ return 'F0-F1'
165
+ if value in ['F2', 'F3']:
166
+ return 'F2-F3'
167
+ if value == 'F4':
168
+ return 'F4'
169
+ raise ValueError('Fibrosis stage must be F0-F1, F2-F3, or F4 (or F0, F1, F2, F3, F4 which will be mapped to these categories)')
170
 
171
  @validator('necroinflammatory_activity')
172
  def validate_necroinflammatory(cls, v):
173
+ value = str(v).strip().upper().replace(" ", "")
174
+ if value in ['A0', 'A1', 'A2', 'A3']:
175
+ return value
176
+ if value in ['0', '1', '2', '3']:
177
+ return 'A' + value
178
+ raise ValueError('Necroinflammatory activity must be A0, A1, A2, or A3')
179
 
180
 
181
  class HBVAssessmentResponse(BaseModel):
core/text_parser.py CHANGED
@@ -172,23 +172,57 @@ def validate_extracted_data(data: Dict[str, Any]) -> Dict[str, Any]:
172
  except (ValueError, TypeError) as e:
173
  raise ValueError(f"Invalid data type in extracted data: {str(e)}")
174
 
175
- # Validate enum values
176
- if data['sex'] not in ['Male', 'Female']:
 
 
 
 
 
177
  raise ValueError(f"Invalid sex value: {data['sex']}")
178
 
179
- if data['pregnancy_status'] not in ['Not pregnant', 'Pregnant']:
 
 
 
 
 
180
  raise ValueError(f"Invalid pregnancy_status value: {data['pregnancy_status']}")
181
 
182
- if data['hbsag_status'] not in ['Positive', 'Negative']:
 
 
 
 
 
183
  raise ValueError(f"Invalid hbsag_status value: {data['hbsag_status']}")
184
 
185
- if data['hbeag_status'] not in ['Positive', 'Negative']:
 
 
 
 
 
186
  raise ValueError(f"Invalid hbeag_status value: {data['hbeag_status']}")
187
 
188
- if data['fibrosis_stage'] not in ['F0-F1', 'F2-F3', 'F4']:
 
 
 
 
 
 
 
 
 
189
  raise ValueError(f"Invalid fibrosis_stage value: {data['fibrosis_stage']}")
190
 
191
- if data['necroinflammatory_activity'] not in ['A0', 'A1', 'A2', 'A3']:
 
 
 
 
 
192
  raise ValueError(f"Invalid necroinflammatory_activity value: {data['necroinflammatory_activity']}")
193
 
194
  return data
 
172
  except (ValueError, TypeError) as e:
173
  raise ValueError(f"Invalid data type in extracted data: {str(e)}")
174
 
175
+ # Validate and normalize enum/string values to canonical forms
176
+ sex_raw = str(data['sex']).strip().lower()
177
+ if sex_raw in ['male', 'm', 'man', 'boy']:
178
+ data['sex'] = 'Male'
179
+ elif sex_raw in ['female', 'f', 'woman', 'girl']:
180
+ data['sex'] = 'Female'
181
+ else:
182
  raise ValueError(f"Invalid sex value: {data['sex']}")
183
 
184
+ preg_raw = str(data['pregnancy_status']).strip().lower()
185
+ if preg_raw in ['pregnant', 'yes', 'y']:
186
+ data['pregnancy_status'] = 'Pregnant'
187
+ elif preg_raw in ['not pregnant', 'non-pregnant', 'non pregnant', 'no', 'n', 'none']:
188
+ data['pregnancy_status'] = 'Not pregnant'
189
+ else:
190
  raise ValueError(f"Invalid pregnancy_status value: {data['pregnancy_status']}")
191
 
192
+ hbsag_raw = str(data['hbsag_status']).strip().lower()
193
+ if hbsag_raw in ['positive', 'pos', '+', 'reactive']:
194
+ data['hbsag_status'] = 'Positive'
195
+ elif hbsag_raw in ['negative', 'neg', '-', 'non-reactive', 'nonreactive']:
196
+ data['hbsag_status'] = 'Negative'
197
+ else:
198
  raise ValueError(f"Invalid hbsag_status value: {data['hbsag_status']}")
199
 
200
+ hbeag_raw = str(data['hbeag_status']).strip().lower()
201
+ if hbeag_raw in ['positive', 'pos', '+', 'reactive']:
202
+ data['hbeag_status'] = 'Positive'
203
+ elif hbeag_raw in ['negative', 'neg', '-', 'non-reactive', 'nonreactive']:
204
+ data['hbeag_status'] = 'Negative'
205
+ else:
206
  raise ValueError(f"Invalid hbeag_status value: {data['hbeag_status']}")
207
 
208
+ fibrosis_raw = str(data['fibrosis_stage']).strip().upper().replace(" ", "")
209
+ if fibrosis_raw in ['F0-F1', 'F2-F3', 'F4']:
210
+ data['fibrosis_stage'] = fibrosis_raw
211
+ elif fibrosis_raw in ['F0', 'F1']:
212
+ data['fibrosis_stage'] = 'F0-F1'
213
+ elif fibrosis_raw in ['F2', 'F3']:
214
+ data['fibrosis_stage'] = 'F2-F3'
215
+ elif fibrosis_raw == 'F4':
216
+ data['fibrosis_stage'] = 'F4'
217
+ else:
218
  raise ValueError(f"Invalid fibrosis_stage value: {data['fibrosis_stage']}")
219
 
220
+ necro_raw = str(data['necroinflammatory_activity']).strip().upper().replace(" ", "")
221
+ if necro_raw in ['A0', 'A1', 'A2', 'A3']:
222
+ data['necroinflammatory_activity'] = necro_raw
223
+ elif necro_raw in ['0', '1', '2', '3']:
224
+ data['necroinflammatory_activity'] = 'A' + necro_raw
225
+ else:
226
  raise ValueError(f"Invalid necroinflammatory_activity value: {data['necroinflammatory_activity']}")
227
 
228
  return data