HBV_AI_Assistant / test_assessment_fixed.py
moazx's picture
Update the Assessment Results
4a17bbc
raw
history blame
7.61 kB
import pandas as pd
import requests
import json
import re
from typing import Optional, List
def parse_hbsag_duration(hbsag_value: str) -> int:
"""Extract duration in months from HBsAg status string."""
if pd.isna(hbsag_value):
return 6 # Default to 6 months if not specified
# Extract number and unit from strings like "Positive (36 months)" or "Positive (10 years)"
match = re.search(r'\((\d+)\s*(months?|years?)\)', str(hbsag_value))
if match:
value = int(match.group(1))
unit = match.group(2).lower()
return value * 12 if 'year' in unit else value
# If just "Positive" with no duration, default to 6 months
return 6
def parse_status(value: str) -> str:
"""Parse status values to 'Positive' or 'Negative' (exact capitalization required)."""
if pd.isna(value):
return "Negative"
val_lower = str(value).lower()
if 'positive' in val_lower:
return "Positive"
elif 'negative' in val_lower:
return "Negative"
return "Negative"
def parse_sex(value: str) -> str:
"""Parse sex to 'Male' or 'Female' (exact capitalization required)."""
if pd.isna(value):
return "Male"
val_lower = str(value).lower()
if val_lower in ['m', 'male']:
return "Male"
elif val_lower in ['f', 'female']:
return "Female"
return "Male"
def parse_pregnancy_status(sex: str, value: str) -> str:
"""Parse pregnancy status to 'Not pregnant' or 'Pregnant' (exact capitalization required)."""
if sex == "Male":
return "Not pregnant"
if pd.isna(value):
return "Not pregnant"
val_lower = str(value).lower()
if 'yes' in val_lower or 'pregnant' in val_lower:
return "Pregnant"
return "Not pregnant"
def parse_boolean(value: str) -> bool:
"""Parse Yes/No values to boolean."""
if pd.isna(value):
return False
val_lower = str(value).lower()
return 'yes' in val_lower
def parse_fibrosis_stage(value: str) -> str:
"""Extract fibrosis stage - must be 'F0-F1', 'F2-F3', or 'F4'."""
if pd.isna(value) or value == "N/A":
return "F0-F1"
val_str = str(value).upper()
# Map specific values
if 'F4' in val_str or 'CIRRHOSIS' in val_str.upper():
return "F4"
elif 'F3' in val_str or 'F2' in val_str:
return "F2-F3"
elif 'F1' in val_str or 'F0' in val_str:
return "F0-F1"
return "F0-F1"
def parse_necroinflammation(value: str) -> str:
"""Parse necroinflammation activity - must be 'A0', 'A1', 'A2', or 'A3'."""
if pd.isna(value) or str(value).lower() == "none":
return "A0"
val_str = str(value).upper()
# Map specific values
if 'A3' in val_str or 'SEVERE' in val_str:
return "A3"
elif 'A2' in val_str or 'MODERATE' in val_str:
return "A2"
elif 'A1' in val_str or 'MILD' in val_str:
return "A1"
elif 'A0' in val_str or 'MINIMAL' in val_str:
return "A0"
return "A1"
def parse_immunosuppression(value: str) -> str:
"""Parse immunosuppression therapy status - must be 'None', 'Chemotherapy', or 'Other'."""
if pd.isna(value) or str(value).lower() == "none":
return "None"
val_lower = str(value).lower()
if 'chemo' in val_lower:
return "Chemotherapy"
elif 'none' in val_lower:
return "None"
else:
return "Other"
def parse_coinfections(value: str) -> List[str]:
"""Parse coinfections - must be from list: HIV, HCV, HDV."""
if pd.isna(value) or str(value).lower() == "none":
return []
coinfections = []
val_upper = str(value).upper()
if 'HCV' in val_upper:
coinfections.append("HCV")
if 'HIV' in val_upper:
coinfections.append("HIV")
if 'HDV' in val_upper:
coinfections.append("HDV")
return coinfections
def parse_comorbidities(value: str) -> Optional[List[str]]:
"""Parse other comorbidities."""
if pd.isna(value) or str(value).lower() == "none":
return None
return [str(value)]
def create_api_payload(row: pd.Series) -> dict:
"""Create API request payload from CSV row."""
sex = parse_sex(row['Sex'])
return {
"sex": sex,
"age": int(row['Age']),
"pregnancy_status": parse_pregnancy_status(sex, row['Pregnancy Status']),
"hbsag_status": parse_status(row['HBsAg']),
"duration_hbsag_months": parse_hbsag_duration(row['HBsAg']),
"hbv_dna_level": float(row['HBV DNA (IU/mL)']),
"hbeag_status": parse_status(row['HBeAg']),
"alt_level": float(row['ALT (U/L)']),
"fibrosis_stage": parse_fibrosis_stage(row['Fibrosis/Cirrhosis Stage']),
"necroinflammatory_activity": parse_necroinflammation(row['Necroinflammation']),
"extrahepatic_manifestations": parse_boolean(row['Extrahepatic Manifestations']),
"immunosuppression_status": parse_immunosuppression(row['Immunosuppressive Therapy']),
"coinfections": parse_coinfections(row['Coinfections']),
"family_history_cirrhosis_hcc": parse_boolean(row['Family History of HCC/Cirrhosis']),
"other_comorbidities": parse_comorbidities(row['Comorbidities'])
}
def assess_case(payload: dict, api_url: str) -> dict:
"""Call the API to assess eligibility."""
try:
response = requests.post(
api_url,
json=payload,
headers={'Content-Type': 'application/json'},
timeout=30
)
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
# Try to get detailed error message
try:
error_detail = response.json()
print(f"API Error Details: {json.dumps(error_detail, indent=2)}")
except:
print(f"API Error: {e}")
return {"eligible": None, "recommendations": f"Error: {str(e)}"}
except requests.exceptions.RequestException as e:
print(f"API Error: {e}")
return {"eligible": None, "recommendations": f"Error: {str(e)}"}
def main():
# Configuration
input_file = r"D:\Work\HBV AI Assistant\data\HBV_Eligibility_TestCases - To Be Tested(Sheet1).csv"
output_file = "HBV_Eligibility_Results.csv"
api_url = "https://moazx-hbv-ai-assistant.hf.space/assess"
# Read CSV
print(f"Reading {input_file}...")
df = pd.read_csv(input_file, encoding='windows-1252')
# Add columns for results
df['Eligibility'] = None
df['Rationale'] = None
# Process each case
print(f"\nProcessing {len(df)} cases...")
for idx, row in df.iterrows():
case_id = row['Case ID']
print(f"\nProcessing {case_id}...")
# Create payload
payload = create_api_payload(row)
print(f"Payload: {json.dumps(payload, indent=2)}")
# Call API
result = assess_case(payload, api_url)
print(f"Result: {result}")
# Update dataframe
df.at[idx, 'Eligibility'] = result.get('eligible')
df.at[idx, 'Rationale'] = result.get('recommendations', '')
# Save results
print(f"\nSaving results to {output_file}...")
df.to_csv(output_file, index=False)
print("Done!")
# Print summary
eligible_count = df['Eligibility'].sum() if df['Eligibility'].notna().any() else 0
print(f"\nSummary:")
print(f"Total cases: {len(df)}")
print(f"Eligible: {eligible_count}")
print(f"Not eligible: {len(df) - eligible_count}")
if __name__ == "__main__":
main()