HBV_AI_Assistant / core /hbv_assessment.py
moazx's picture
Update the HBV with context 2
79e52d8
"""
HBV Patient Assessment Module
Evaluates patient eligibility for HBV treatment according to SASLT 2021 guidelines
"""
import logging
import json
import re
from typing import Dict, Any
from .config import get_llm
logger = logging.getLogger(__name__)
def clean_json_string(json_str: str) -> str:
"""
Clean a JSON string by properly escaping control characters within string values.
This handles cases where LLMs generate JSON with literal newlines, tabs, etc.
Args:
json_str: Raw JSON string that may contain unescaped control characters
Returns:
Cleaned JSON string with properly escaped control characters
"""
# First, try to identify string values in the JSON and escape control characters within them
# We need to be careful not to break the JSON structure itself
# Replace common control characters that appear in string values
# but preserve the JSON structure (newlines between key-value pairs are OK)
# Strategy: Parse character by character, track if we're inside a string value
result = []
in_string = False
escape_next = False
for i, char in enumerate(json_str):
if escape_next:
result.append(char)
escape_next = False
continue
if char == '\\':
result.append(char)
escape_next = True
continue
if char == '"':
in_string = not in_string
result.append(char)
continue
# If we're inside a string value, escape control characters
if in_string:
if char == '\n':
result.append('\\n')
elif char == '\r':
result.append('\\r')
elif char == '\t':
result.append('\\t')
elif char == '\b':
result.append('\\b')
elif char == '\f':
result.append('\\f')
elif ord(char) < 32: # Other control characters
result.append(f'\\u{ord(char):04x}')
else:
result.append(char)
else:
result.append(char)
return ''.join(result)
# SASLT 2021 Guidelines - Hardcoded Page Contents
SASLT_GUIDELINES = """
===== TREATMENT RECOMMENDATIONS =====
### 1. INITIATION OF TREATMENT [SASLT 2021, p. 6]
β€’ Treatment indications should also take into account patient's age, health status, risk of HBV transmission, family history of HCC or cirrhosis and extrahepatic manifestations
β€’ All patients with chronic hepatitis B (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and/or at least moderate liver necroinflammation or fibrosis (Grade A)
β€’ Patients with cirrhosis (compensated or decompensated), with any detectable HBV DNA level and regardless of ALT levels (Grade A)
β€’ Patients with HBV DNA > 20,000 IU/mL and ALT > 2xULN, regardless of the degree of fibrosis (Grade B)
β€’ Patients with HBeAg-positive chronic HBV infection (persistently normal ALT and high HBV DNA levels) may be treated if they are > 30 years, regardless of the severity of liver histological lesions (Grade D)
β€’ Patients with chronic HBV infection (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and a family history of HCC or cirrhosis and extrahepatic manifestations (Grade D)
### 2. MANAGEMENT ALGORITHM [SASLT 2021, p. 6]
β€’ HBsAg positive with chronic HBV infection and no signs of chronic hepatitis β†’ Monitor (HBsAg, HBeAg, HBV DNA, ALT, fibrosis assessment). Consider: risk of HCC, risk of HBV reactivation, extrahepatic manifestations, risk of HBV transmission
β€’ CHB (with/without cirrhosis) β†’ Start antiviral treatment if indicated, otherwise return to monitoring
β€’ HBsAg negative, anti-HBc positive β†’ No specialist follow-up (inform about HBV reactivation risk). In case of immunosuppression: start oral antiviral prophylaxis or monitor
### 3. MONITORING OF UNTREATED PATIENTS [SASLT 2021, p. 6-7]
β€’ Patients with HBeAg-positive chronic HBV infection who are younger than 30 years should be followed at least every 3-6 months (Grade B)
β€’ Patients with HBeAg-negative chronic HBV infection and serum HBV DNA <2,000 IU/ml should be followed every 6-12 months (Grade B)
β€’ Patients with HBeAg-negative chronic HBV infection and serum HBV DNA β‰₯2,000 IU/ml should be followed every 3 months for the first year and thereafter every 6 months (Grade D)
### 4. CHRONIC HEPATITIS B (CHB) TREATMENT [SASLT 2021, p. 7-8]
β€’ The treatment of choice is the long-term administration of a potent nucleos(t)ide analogue NA with a high barrier to resistance, regardless of the severity of liver disease (Grade A)
β€’ Preferred regimens are ETV, TDF and TAF as monotherapies (Grade A)
β€’ LAM, ADV and TBV are not recommended in the treatment of CHB (Grade A)
### 5. HBV-HCV COINFECTION [SASLT 2021, p. 8-9]
β€’ Treatment of HCV through DAAs may lead to reactivation of HBV. Patients who meet the criteria for HBV treatment should be treated concurrently or before initiation of DAA (Grade A)
β€’ HBV DNA and ALT should be monitored every four to eight weeks while on DAA and three months after completion of therapy (Grade D)
β€’ ALT level should be monitored every four weeks while on DAA for patients who are HBsAg-negative but HBcAb-positive. If ALT starts to rise, HBsAg and HBV DNA must be obtained to determine the need to start HBV treatment (Grade D)
### 6. HBV-HDV COINFECTION [SASLT 2021, p. 9]
β€’ HDV is a defective virus that requires HBsAg to envelop its delta antigen, causing coinfection with HBV or superinfection in chronic HBV patients
β€’ Active HDV infection is defined by HDV IgM and RNA presence with unexplained LFT elevation
β€’ Treatment goal: Suppression of HDV replication
β€’ PEG-IFN for 1 year shows long-term benefits despite post-treatment viral relapse
β€’ NA monotherapy is ineffective against HDV replication
### 7. HBV-HIV COINFECTION [SASLT 2021, p. 9]
β€’ All HIV-positive patients with HBV co-infection should start ART irrespective of CD4 cell count (Grade A)
β€’ HBV-HIV co-infected patients should be treated with TDF- or TAF-based ART regimen (Grade A)
### 8. IMMUNOCOMPROMISED PATIENTS [SASLT 2021, p. 9]
β€’ Prophylaxis for all HBsAg-positive patients before chemotherapy or immunosuppressive therapy (Grade A)
β€’ HBsAg-negative/anti-HBc-positive patients need HBV prophylaxis if receiving anti-CD20 or stem cell transplantation
β€’ Continue prophylaxis for β‰₯6 months after immunosuppression (12 months for anti-CD20)
### 9. PREGNANCY [SASLT 2021, p. 9-10]
β€’ Screen all pregnant women for HBV in first trimester (Grade A)
β€’ HBV vaccine is safe in pregnancy for non-immune women without chronic HBV
β€’ Treat pregnant women meeting standard therapy indications
β€’ Start antiviral prophylaxis with TDF (or TAF) for HBV DNA >100,000 IU/mL at 24-28 weeks (Grade D)
β€’ Switch to TDF/TAF if on ETV, ADV, or interferon during pregnancy (Grade D)
β€’ Delivery mode based on obstetric indications only
β€’ Breastfeeding permitted for HBsAg+ women on TDF (Grade B)
----
"""
def assess_hbv_eligibility(patient_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Assess patient eligibility for HBV treatment based on SASLT 2021 guidelines
using hardcoded guideline pages (3, 4, 6, 7, 8, 9, 10) and LLM analysis
Args:
patient_data: Dictionary containing patient clinical parameters
Returns:
Dictionary with assessment results:
- eligible: bool
- recommendations: str (comprehensive narrative with inline citations in format [SASLT 2021, Page X])
"""
try:
# Check if HBsAg is positive (required for treatment consideration)
if patient_data.get("hbsag_status") != "Positive":
return {
"eligible": False,
"recommendations": "Patient is HBsAg negative. HBV treatment is not indicated. HBsAg positivity is required for HBV treatment consideration according to SASLT 2021 guidelines."
}
# Use hardcoded SASLT 2021 guidelines instead of RAG retrieval
logger.info("Using hardcoded SASLT 2021 guidelines (Pages 3, 4, 6, 7, 8, 9, 10)")
# Define ALT ULN for context
sex = patient_data.get("sex", "Male")
alt_uln = 35 if sex == "Male" else 25
# Format patient data for prompt
age = patient_data.get("age", "N/A")
pregnancy_status = patient_data.get("pregnancy_status", "N/A")
hbsag_status = patient_data.get("hbsag_status", "N/A")
duration_hbsag = patient_data.get("duration_hbsag_months", "N/A")
hbv_dna = patient_data.get("hbv_dna_level", 0)
hbeag_status = patient_data.get("hbeag_status", "N/A")
alt_level = patient_data.get("alt_level", 0)
fibrosis_stage = patient_data.get("fibrosis_stage", "N/A")
necroinflammatory = patient_data.get("necroinflammatory_activity", "N/A")
extrahepatic = patient_data.get("extrahepatic_manifestations", False)
immunosuppression = patient_data.get("immunosuppression_status", "None")
coinfections = patient_data.get("coinfections", [])
family_history = patient_data.get("family_history_cirrhosis_hcc", False)
comorbidities = patient_data.get("other_comorbidities", [])
# Create prompt for LLM to analyze patient against guidelines
analysis_prompt = f"""You are an HBV treatment eligibility assessment system. Analyze the patient data against SASLT 2021 guidelines.
PATIENT DATA:
- Sex: {sex}
- Age: {age} years
- Pregnancy Status: {pregnancy_status}
- HBsAg Status: {hbsag_status}
- HBsAg Duration: {duration_hbsag} months
- HBV DNA Level: {hbv_dna:,.0f} IU/mL
- HBeAg Status: {hbeag_status}
- ALT Level: {alt_level} U/L (ULN: {alt_uln} U/L)
- Fibrosis Stage: {fibrosis_stage}
- Necroinflammatory Activity: {necroinflammatory}
- Extrahepatic Manifestations: {extrahepatic}
- Immunosuppression: {immunosuppression}
- Coinfections: {', '.join(coinfections) if coinfections else 'None'}
- Family History (Cirrhosis/HCC): {family_history}
- Other Comorbidities: {', '.join(comorbidities) if comorbidities else 'None'}
SASLT 2021 GUIDELINES (Retrieved Context):
{SASLT_GUIDELINES}
Based STRICTLY on the SASLT 2021 guidelines and criteria provided above, assess this patient's eligibility for HBV antiviral treatment.
You MUST respond with a valid JSON object in this exact format:
{{
"eligible": true or false,
"recommendations": "Comprehensive assessment with inline citations"
}}
IMPORTANT JSON FORMATTING:
- Return ONLY valid JSON without markdown code blocks
- Use spaces instead of literal newlines within the "recommendations" string
- Separate paragraphs with double spaces or use \\n for line breaks
- Do NOT include literal newline characters in the JSON string values
CRITICAL CITATION REQUIREMENTS:
1. The "recommendations" field must be a comprehensive narrative that includes:
- Eligibility determination with rationale
- Specific criteria met or not met from the guidelines
- Treatment options if eligible (ETV, TDF, TAF as first-line agents)
- Special considerations (pregnancy, immunosuppression, coinfections, etc.)
- Any additional clinical notes
- **References** section at the end listing all cited pages
2. EVERY statement in recommendations MUST include inline citations in this format:
"[SASLT 2021, Page X]" where X is the specific page number
3. Example format:
"Patient meets treatment criteria based on HBV DNA > 2,000 IU/mL, ALT > ULN, and moderate fibrosis (Grade A) [SASLT 2021, Page 12]. First-line antiviral agents including entecavir (ETV), tenofovir disoproxil fumarate (TDF), or tenofovir alafenamide (TAF) are recommended [SASLT 2021, Page 15]. Patient should be monitored for treatment response [SASLT 2021, Page 18].
**References**
SASLT 2021 Guidelines - Pages: 12, 15, 18
(Treatment Eligibility Criteria, First-Line Antiviral Agents, Monitoring Protocols)"
4. ALWAYS cite the specific page number from the [Source: ..., Page: X] markers in the guidelines above
5. Include evidence grade (Grade A, B, C, D) when available in the guidelines
6. END the recommendations with a **References** section that lists all cited pages in ascending order with brief description of topics covered
IMPORTANT:
1. Base your assessment ONLY on the SASLT 2021 guidelines provided
2. Make recommendations comprehensive and detailed
3. Cite page numbers after EVERY clinical statement or recommendation
4. Use the format [SASLT 2021, Page X] for all citations
5. Include a **References** section at the end listing all pages cited
6. Return ONLY the JSON object, no additional text
"""
# Log the complete prompt being sent to LLM
logger.info(f"\n{'='*80}")
logger.info(f"LLM PROMPT")
logger.info(f"{'='*80}")
logger.info(f"\n{analysis_prompt}\n")
logger.info(f"{'='*80}\n")
# Get LLM response
llm = get_llm()
logger.info("Sending prompt to LLM...")
response = llm.invoke(analysis_prompt)
logger.info("LLM response received")
# Extract JSON from response
response_text = response.content if hasattr(response, 'content') else str(response)
# Log LLM response
logger.info(f"\n{'='*80}")
logger.info(f"LLM RESPONSE")
logger.info(f"{'='*80}")
logger.info(f"\n{response_text}\n")
logger.info(f"{'='*80}\n")
# Try to parse JSON from response
try:
# Remove markdown code blocks if present
if '```json' in response_text:
json_start = response_text.find('```json') + 7
json_end = response_text.find('```', json_start)
response_text = response_text[json_start:json_end].strip()
elif '```' in response_text:
json_start = response_text.find('```') + 3
json_end = response_text.find('```', json_start)
response_text = response_text[json_start:json_end].strip()
# Find JSON in response (handle cases where LLM adds extra text)
json_start = response_text.find('{')
json_end = response_text.rfind('}') + 1
if json_start >= 0 and json_end > json_start:
json_str = response_text[json_start:json_end]
# Clean the JSON string to escape control characters within string values
cleaned_json_str = clean_json_string(json_str)
logger.debug(f"Cleaned JSON string (first 500 chars): {cleaned_json_str[:500]}")
# Parse the cleaned JSON
result = json.loads(cleaned_json_str)
logger.info(f"βœ… Successfully parsed JSON response")
else:
raise ValueError("No JSON found in response")
# Validate and return result
assessment_result = {
"eligible": result.get("eligible", False),
"recommendations": result.get("recommendations", "")
}
# Log final assessment
logger.info(f"\n{'='*80}")
logger.info(f"FINAL ASSESSMENT")
logger.info(f"{'='*80}")
logger.info(f"Eligible: {assessment_result['eligible']}")
logger.info(f"Recommendations length: {len(assessment_result['recommendations'])} characters")
logger.info(f"{'='*80}\n")
return assessment_result
except (json.JSONDecodeError, ValueError) as e:
logger.error(f"Failed to parse LLM response as JSON: {e}")
logger.error(f"Response text: {response_text}")
# Fallback: return error response
return {
"eligible": False,
"recommendations": f"Error parsing assessment results. Please try again. Error details: {str(e)}"
}
except Exception as e:
logger.error(f"Error in assess_hbv_eligibility: {str(e)}")
raise