HBV_AI_Assistant / core /hbv_assessment.py
moazx's picture
Implement the core eligibility logic natively in Python.
22770b7
raw
history blame
28.5 kB
"""
HBV Patient Assessment Module
Evaluates patient eligibility for HBV treatment according to SASLT 2021 guidelines
"""
import logging
import json
import re
from typing import Dict, Any, Tuple
from .config import get_llm
logger = logging.getLogger(__name__)
def clean_json_string(json_str: str) -> str:
"""
Clean a JSON string by properly escaping control characters within string values.
This handles cases where LLMs generate JSON with literal newlines, tabs, etc.
Args:
json_str: Raw JSON string that may contain unescaped control characters
Returns:
Cleaned JSON string with properly escaped control characters
"""
# First, try to identify string values in the JSON and escape control characters within them
# We need to be careful not to break the JSON structure itself
# Replace common control characters that appear in string values
# but preserve the JSON structure (newlines between key-value pairs are OK)
# Strategy: Parse character by character, track if we're inside a string value
result = []
in_string = False
escape_next = False
for i, char in enumerate(json_str):
if escape_next:
result.append(char)
escape_next = False
continue
if char == '\\':
result.append(char)
escape_next = True
continue
if char == '"':
in_string = not in_string
result.append(char)
continue
# If we're inside a string value, escape control characters
if in_string:
if char == '\n':
result.append('\\n')
elif char == '\r':
result.append('\\r')
elif char == '\t':
result.append('\\t')
elif char == '\b':
result.append('\\b')
elif char == '\f':
result.append('\\f')
elif ord(char) < 32: # Other control characters
result.append(f'\\u{ord(char):04x}')
else:
result.append(char)
else:
result.append(char)
return ''.join(result)
def normalize_recommendations(text: str) -> str:
"""
Normalize the recommendations string for concise, consistent formatting.
- Remove extra blank lines
- Trim trailing/leading whitespace on each line
- Add single blank line before section headers for readability
- Optionally cap length to avoid overly long outputs
"""
if not text:
return ""
# Normalize newlines
text = text.replace('\r\n', '\n').replace('\r', '\n')
# Split, strip, and drop empty lines
lines = [ln.strip() for ln in text.split('\n')]
lines = [ln for ln in lines if ln]
# Add spacing before section headers (lines ending with ':')
# but not before the first line
formatted_lines = []
section_headers = [
'Eligibility and Rationale:',
'Treatment Recommendations:',
'Monitoring and Follow-up:',
'Special Considerations:',
'References:'
]
for i, line in enumerate(lines):
# Add blank line before section headers (except first line)
if i > 0 and any(line.startswith(header) for header in section_headers):
formatted_lines.append('') # Add blank line
formatted_lines.append(line)
normalized = '\n'.join(formatted_lines)
# Soft cap length (keep whole content if already short)
max_len = 1800
if len(normalized) > max_len:
normalized = normalized[:max_len].rstrip()
return normalized
# SASLT 2021 Guidelines - Hardcoded Page Contents
SASLT_GUIDELINES = """
===== SASLT 2021 GUIDELINES: TREATMENT & MANAGEMENT =====
### 1. INITIATION OF TREATMENT [SASLT 2021, p. 6]
• Treatment indications should also take into account patient's age, health status, risk of HBV transmission, family history of HCC or cirrhosis and extrahepatic manifestations [SASLT 2021, p. 6]
• All patients with chronic hepatitis B (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and/or at least moderate liver necroinflammation or fibrosis (Grade A) [SASLT 2021, p. 6]
• Patients with cirrhosis (compensated or decompensated), with any detectable HBV DNA level and regardless of ALT levels (Grade A) [SASLT 2021, p. 6]
• Patients with HBV DNA > 20,000 IU/mL and ALT > 2xULN, regardless of the degree of fibrosis (Grade B) [SASLT 2021, p. 6]
• Patients with HBeAg-positive chronic HBV infection (persistently normal ALT and high HBV DNA levels) may be treated if they are > 30 years, regardless of the severity of liver histological lesions (Grade D) [SASLT 2021, p. 6]
• Patients with chronic HBV infection (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and a family history of HCC or cirrhosis and extrahepatic manifestations (Grade D) [SASLT 2021, p. 6]
• Non‑cirrhotic patients should be considered for treatment if they have HBV DNA levels >2,000 IU/mL, serum ALT >~40 IU/L and severity of liver disease assessed by liver biopsy showing at least moderate necroinflammation and/or at least moderate fibrosis.
Patients with HBV DNA greater than 20,000 IU/mL and ALT greater than 2x ULN can begin treatment without a liver biopsy.
Patients with HBV DNA >2,000 IU/mL and at least moderate fibrosis may initiate treatment even if ALT levels are normal.
### 2. MANAGEMENT ALGORITHM [SASLT 2021, p. 6]
• HBsAg positive with chronic HBV infection and no signs of chronic hepatitis → Monitor (HBsAg, HBeAg, HBV DNA, ALT, fibrosis assessment). Consider: risk of HCC, risk of HBV reactivation, extrahepatic manifestations, risk of HBV transmission [SASLT 2021, p. 6]
• CHB (with/without cirrhosis) → Start antiviral treatment if indicated, otherwise return to monitoring [SASLT 2021, p. 6]
• HBsAg negative, anti-HBc positive → No specialist follow-up (inform about HBV reactivation risk). In case of immunosuppression: start oral antiviral prophylaxis or monitor [SASLT 2021, p. 6]
### 3. MONITORING OF UNTREATED PATIENTS [SASLT 2021, p. 6-7]
• Patients with HBeAg-positive chronic HBV infection who are younger than 30 years should be followed at least every 3-6 months (Grade B) [SASLT 2021, p. 7]
• Patients with HBeAg-negative chronic HBV infection and serum HBV DNA <2,000 IU/ml should be followed every 6-12 months (Grade B) [SASLT 2021, p. 7]
• Patients with HBeAg-negative chronic HBV infection and serum HBV DNA ≥2,000 IU/ml should be followed every 3 months for the first year and thereafter every 6 months (Grade D) [SASLT 2021, p. 7]
### 4. CHRONIC HEPATITIS B (CHB) TREATMENT [SASLT 2021, p. 7-8]
• The treatment of choice is the long-term administration of a potent nucleos(t)ide analogue NA with a high barrier to resistance, regardless of the severity of liver disease (Grade A) [SASLT 2021, p. 8]
• Preferred regimens are ETV, TDF and TAF as monotherapies (Grade A) [SASLT 2021, p. 8]
• LAM, ADV and TBV are not recommended in the treatment of CHB (Grade A) [SASLT 2021, p. 8]
• TAF has demonstrated superior renal and bone density safety profiles compared with TDF in head-to-head trials [SASLT 2021, p. 8]
• International guidelines recommend switching individuals at high risk for bone or renal disease from TDF to either TAF or ETV [SASLT 2021, p. 8]
• TAF maintains a better safety profile unless the patient's creatinine clearance (CrCl) is less than 15 mL/minute [SASLT 2021, p. 8]
### 5. HBV-HCV COINFECTION [SASLT 2021, p. 8-9]
• Treatment of HCV through DAAs may lead to reactivation of HBV. Patients who meet the criteria for HBV treatment should be treated concurrently or before initiation of DAA (Grade A) [SASLT 2021, p. 9]
• HBV DNA and ALT should be monitored every four to eight weeks while on DAA and three months after completion of therapy (Grade D) [SASLT 2021, p. 9]
• ALT level should be monitored every four weeks while on DAA for patients who are HBsAg-negative but HBcAb-positive. If ALT starts to rise, HBsAg and HBV DNA must be obtained to determine the need to start HBV treatment (Grade D) [SASLT 2021, p. 9]
### 6. HBV-HDV COINFECTION [SASLT 2021, p. 9]
• HDV is a defective virus that requires HBsAg to envelop its delta antigen, causing coinfection with HBV or superinfection in chronic HBV patients [SASLT 2021, p. 9]
• Active HDV infection is defined by HDV IgM and RNA presence with unexplained LFT elevation [SASLT 2021, p. 9]
• Treatment goal: Suppression of HDV replication [SASLT 2021, p. 9]
• PEG-IFN for 1 year shows long-term benefits despite post-treatment viral relapse [SASLT 2021, p. 9]
• NA monotherapy is ineffective against HDV replication [SASLT 2021, p. 9]
### 7. HBV-HIV COINFECTION [SASLT 2021, p. 9]
• All HIV-positive patients with HBV co-infection should start ART irrespective of CD4 cell count (Grade A) [SASLT 2021, p. 9]
• HBV-HIV co-infected patients should be treated with TDF- or TAF-based ART regimen (Grade A) [SASLT 2021, p. 9]
### 8. IMMUNOCOMPROMISED PATIENTS [SASLT 2021, p. 9]
• Prophylaxis for all HBsAg-positive patients before chemotherapy or immunosuppressive therapy (Grade A) [SASLT 2021, p. 9]
• HBsAg-negative/anti-HBc-positive patients need HBV prophylaxis if receiving anti-CD20 or stem cell transplantation [SASLT 2021, p. 9]
• Continue prophylaxis for ≥6 months after immunosuppression (12 months for anti-CD20) [SASLT 2021, p. 9]
• All patients undergoing immunosuppressive treatment or chemotherapy, even short‑term courses, should be screened for HBsAg, anti‑HBc, and anti‑HBs (and HBV DNA, if HBsAg is already positive). [SASLT 2021, p. 9]
• We recommend prophylaxis for all patients with positive HBsAg before initiating chemotherapy or other immunosuppressive agents. [SASLT 2021, p. 9]
• For HBsAg-negative and anti-HBc positive patients, we recommend HBV prophylaxis if they are candidates for anti CD20 or are undergoing stem cell transplantation. [SASLT 2021, p. 9]
• We recommend starting HBV prophylaxis for HBsAg or anti‑HBc positive patients undergoing treatment with tumor necrosis factor (TNF) inhibitors. [SASLT 2021, p. 9]
• We recommend HBV prophylaxis for all patients who are HBsAg or anti-HBc positive before initiation of immunotherapy such as anti‑programmed cell death (PD) ‑1 and anti‑programmed cell death‑ligand 1 (PD‑L1) therapy. [SASLT 2021, p. 9]
### 9. PREGNANCY [SASLT 2021, p. 9-10]
• Screen all pregnant women for HBV in first trimester (Grade A) [SASLT 2021, p. 9]
• HBV vaccine is safe in pregnancy for non-immune women without chronic HBV. [SASLT 2021, p. 9]
• Treat pregnant women meeting standard therapy indications [SASLT 2021, p. 9]
• Start antiviral prophylaxis with TDF (or TAF) for HBV DNA >100,000 IU/mL at 24-28 weeks (Grade D) [SASLT 2021, p. 10]
• Switch to TDF/TAF if on ETV, ADV, or interferon during pregnancy (Grade D) [SASLT 2021, p. 10]
• Delivery mode based on obstetric indications only [SASLT 2021, p. 10]
• Breastfeeding permitted for HBsAg+ women on TDF (Grade B) [SASLT 2021, p. 10]
### KEY DEFINITIONS & CONTEXT
* **ALT (Alanine Aminotransferase):** Normal range in Adults falls between 4-42 units/L (U/L).
* **ULN (Upper Limit of Normal):** ULN means Upper Limit of Normal for ALT which is set at 40 units/L for the purpose of these guidelines.
* ** Necroinflammatory activity scale; A1 = mild, A2 = moderate, A3 = severe
Liver fibrosis is usually classified into five stages:
F0—no fibrosis; F1—mild fibrosis, pericellular collagen deposits; F2—moderate fibrosis, beginning bridging fibrosis; F3—severe fibrosis, defined as presence of numerous bridges and septa; F4—cirrhosis
"""
def check_eligibility_criteria(patient_data: Dict[str, Any]) -> Tuple[bool, str]:
"""
Deterministically check patient eligibility based on SASLT 2021 guidelines.
Returns: (is_eligible, rationale_message)
"""
hbsag_status = patient_data.get("hbsag_status", "N/A")
hbv_dna_numeric = patient_data.get("hbv_dna_level_numeric", 0.0) # Using numeric derived value
hbeag_status = patient_data.get("hbeag_status", "N/A")
alt_level = patient_data.get("alt_level", 0)
fibrosis_stage = patient_data.get("fibrosis_stage", "F0")
necroinflammatory = patient_data.get("necroinflammatory_activity", "A0")
age = patient_data.get("age", 0)
extrahepatic = patient_data.get("extrahepatic_manifestations", False)
immunosuppression = patient_data.get("immunosuppression_status", "None")
coinfections = patient_data.get("coinfections", [])
family_history = patient_data.get("family_history_cirrhosis_hcc", False)
pregnancy_status = patient_data.get("pregnancy_status", "No")
ULN = 40 # Based on KEY DEFINITIONS
# 0. Basic Requirement Check
if hbsag_status != "Positive":
return False, "Not eligible: HBsAg negative. HBsAg positivity is required for HBV treatment consideration [SASLT 2021, Page 6]."
# --- 1. Special Populations (CRITICAL ELIGIBILITY HIERARCHY: FIRST) ---
# 1.1 Immunosuppression (Page 9)
if immunosuppression != "None":
return True, f"Eligible for prophylaxis: HBsAg-positive patient receiving immunosuppressive therapy/chemotherapy (Grade A) [SASLT 2021, Page 9]."
# 1.2 HBV-HIV Coinfection (Page 9)
if "HIV" in coinfections:
return True, "Eligible for ART: HBV-HIV coinfection requires TDF- or TAF-based ART regimen (Grade A) [SASLT 2021, Page 9]."
# 1.3 Pregnancy (Page 10)
if pregnancy_status == "Yes" and hbv_dna_numeric > 100000:
return True, "Eligible for prophylaxis: Pregnant woman with HBV DNA > 100,000 IU/mL started at 24-28 weeks (Grade D) [SASLT 2021, Page 10]."
# --- 2. Standard Criteria (CRITICAL ELIGIBILITY HIERARCHY: SECOND) ---
# Helper checks
is_cirrhotic = fibrosis_stage == "F4"
has_moderate_fibrosis_or_necroinflammation = fibrosis_stage in ["F2", "F3", "F4"] or necroinflammatory in ["A2", "A3"]
is_alt_elevated = alt_level > ULN
is_alt_doubly_elevated = alt_level > (2 * ULN)
# 2.1 Cirrhosis (F4) (Grade A)
if is_cirrhotic:
return True, "Eligible: Patient has cirrhosis (F4) regardless of ALT and HBV DNA level (Grade A) [SASLT 2021, Page 6]."
# 2.2 Severe Viremia/ALT (HBV DNA > 20,000 & ALT > 2xULN) (Grade B)
if hbv_dna_numeric > 20000 and is_alt_doubly_elevated:
return True, "Eligible: HBV DNA > 20,000 IU/mL and ALT > 2xULN (Grade B) [SASLT 2021, Page 6]."
# 2.3 Active CHB (HBV DNA > 2,000 & ALT > ULN & F2/A2+) (Grade A) - Main Criterion
if hbv_dna_numeric > 2000 and is_alt_elevated and has_moderate_fibrosis_or_necroinflammation:
return True, "Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, and at least moderate fibrosis/necroinflammation (Grade A) [SASLT 2021, Page 6]."
# 2.4 HBeAg+ > 30 years (HBeAg+, Normal ALT, High DNA) (Grade D)
if hbeag_status == "Positive" and age > 30 and not is_alt_elevated and hbv_dna_numeric > 2000:
return True, "Eligible: HBeAg-positive chronic infection in patient > 30 years despite normal ALT (Grade D) [SASLT 2021, Page 6]."
# 2.5 Viremia and Moderate Fibrosis (HBV DNA > 2,000 & Moderate Fibrosis) (Implicit from Page 6)
# "Patients with HBV DNA >2,000 IU/mL and at least moderate fibrosis may initiate treatment even if ALT levels are normal."
if hbv_dna_numeric > 2000 and fibrosis_stage in ["F2", "F3", "F4"]:
return True, "Eligible: HBV DNA > 2,000 IU/mL and at least moderate fibrosis (F2+) (Grade D/Implicit) [SASLT 2021, Page 6]."
# 2.6 Family History/Extrahepatic (HBV DNA > 2,000 & ALT > ULN & History/Extrahepatic) (Grade D)
if hbv_dna_numeric > 2000 and is_alt_elevated and (family_history or extrahepatic):
return True, "Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, and family history of HCC/cirrhosis or extrahepatic manifestations (Grade D) [SASLT 2021, Page 6]."
# --- 3. Not Eligible ---
# Determine reason for non-eligibility for monitoring purposes
if hbeag_status == "Negative" and hbv_dna_numeric < 2000:
rationale = "Not eligible: HBV DNA < 2,000 IU/mL and ALT ≤ ULN; monitoring indicated [SASLT 2021, Page 6]."
elif not is_alt_elevated and not has_moderate_fibrosis_or_necroinflammation:
rationale = "Not eligible: ALT ≤ ULN and lack of significant fibrosis/necroinflammation; monitoring indicated [SASLT 2021, Page 6]."
else:
rationale = "Not eligible: Criteria not fully met (e.g., HBV DNA < 2,000 IU/mL) [SASLT 2021, Page 6]."
return False, rationale
def assess_hbv_eligibility(patient_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Assess patient eligibility for HBV treatment based on SASLT 2021 guidelines
using hardcoded guideline pages (3, 4, 6, 7, 8, 9, 10) and LLM analysis
Args:
patient_data: Dictionary containing patient clinical parameters
Returns:
Dictionary with assessment results:
- eligible: bool
- recommendations: str (comprehensive narrative with inline citations in format [SASLT 2021, Page X])
"""
try:
# Use hardcoded SASLT 2021 guidelines instead of RAG retrieval
logger.info("Using hardcoded SASLT 2021 guidelines (Pages 3, 4, 6, 7, 8, 9, 10)")
# --- Pre-processing for numeric comparison ---
hbv_dna = patient_data.get("hbv_dna_level", 0)
hbv_dna_numeric = hbv_dna
if isinstance(hbv_dna_numeric, str):
try:
cleaned = re.sub(r"[^\d\.]", "", hbv_dna_numeric)
# Attempt to parse as float, handling empty string from cleaning
hbv_dna_numeric = float(cleaned) if cleaned else 0.0
except Exception:
hbv_dna_numeric = 0.0
try:
hbv_dna_numeric = float(hbv_dna_numeric)
except (TypeError, ValueError):
hbv_dna_numeric = 0.0
patient_data["hbv_dna_level_numeric"] = hbv_dna_numeric # Add numeric value to patient data
if hbv_dna_numeric > 2000:
hbv_dna_2000_comparison = ">"
elif hbv_dna_numeric < 2000:
hbv_dna_2000_comparison = "<"
else:
hbv_dna_2000_comparison = "="
patient_data["hbv_dna_2000_comparison"] = hbv_dna_2000_comparison
logger.info(f"HBV DNA vs 2000 IU/mL comparison: {hbv_dna_numeric} {hbv_dna_2000_comparison} 2000")
# --- Deterministic Eligibility Check ---
is_eligible, rationale_message = check_eligibility_criteria(patient_data)
# --- Prepare LLM Prompt with Deterministic Result ---
sex = patient_data.get("sex", "Male")
age = patient_data.get("age", "N/A")
pregnancy_status = patient_data.get("pregnancy_status", "N/A")
hbsag_status = patient_data.get("hbsag_status", "N/A")
duration_hbsag = patient_data.get("duration_hbsag_months", "N/A")
hbeag_status = patient_data.get("hbeag_status", "N/A")
alt_level = patient_data.get("alt_level", 0)
fibrosis_stage = patient_data.get("fibrosis_stage", "N/A")
necroinflammatory = patient_data.get("necroinflammatory_activity", "N/A")
extrahepatic = patient_data.get("extrahepatic_manifestations", False)
immunosuppression = patient_data.get("immunosuppression_status", "None")
coinfections = patient_data.get("coinfections", [])
family_history = patient_data.get("family_history_cirrhosis_hcc", False)
comorbidities = patient_data.get("other_comorbidities", [])
# Crucial addition: Pass the determined status and rationale to the LLM
DETERMINISTIC_STATUS = "Eligible: true" if is_eligible else "Eligible: false"
DETERMINISTIC_RATIONALE = rationale_message
analysis_prompt = f"""You are an HBV treatment eligibility assessment system. Analyze the patient data against the SASLT 2021 guidelines.
PATIENT DATA:
- Sex: {sex}
- Age: {age} years
- Pregnancy Status: {pregnancy_status}
- HBsAg Status: {hbsag_status}
- HBsAg Duration: {duration_hbsag} months
- HBV DNA Level: {hbv_dna} IU/mL
- HBV DNA vs 2000 IU/mL: {hbv_dna_2000_comparison}
- HBeAg Status: {hbeag_status}
- ALT Level: {alt_level}
- Fibrosis Stage: {fibrosis_stage}
- Necroinflammatory Activity: {necroinflammatory}
- Extrahepatic Manifestations: {extrahepatic}
- Immunosuppression: {immunosuppression}
- Coinfections: {', '.join(coinfections) if coinfections else 'None'}
- Family History (Cirrhosis/HCC): {family_history}
- Other Comorbidities: {', '.join(comorbidities) if comorbidities else 'None'}
DETERMINISTIC ELIGIBILITY RESULT (LLM MUST USE THIS EXACT STATUS):
{DETERMINISTIC_STATUS}
Primary Rationale: {DETERMINISTIC_RATIONALE}
NUMERIC RULES (MUST FOLLOW EXACTLY):
- Compare integers exactly as written (e.g., 1800 < 2000 is TRUE; 1800 > 2000 is FALSE).
- Respect inequality signs from the guidelines (>, >=, <, <=) without relaxing or tightening thresholds.
SASLT 2021 GUIDELINES (Retrieved Context):
{SASLT_GUIDELINES}
Based STRICTLY on the SASLT 2021 guidelines and the DETERMINISTIC ELIGIBILITY RESULT, complete the JSON response.
You MUST respond with a valid JSON object in this exact format:
{{
  "eligible": {DETERMINISTIC_STATUS.split(': ')[1]},
  "recommendations": "Comprehensive assessment with inline citations"
}}
IMPORTANT JSON FORMATTING:
- Return ONLY valid JSON without markdown code blocks.
- You MUST use "\\n" to indicate line breaks inside the "recommendations" string and format the content as clear bullet lists prefixed with "- ".
- Do NOT include literal newline characters. Use \\n for every new bullet or line.
- Use SINGLE \\n between lines. Do NOT use \\n\\n (double newlines) anywhere.
CRITICAL INSTRUCTION:
The value of the "eligible" field MUST match the DETERMINISTIC ELIGIBILITY RESULT above.
STRUCTURE AND CONTENT OF "recommendations" (CONCISE & ORGANIZED):
- Use ONLY these sections in this exact order, each as a header followed by 1-3 concise bullets:
  1. "Eligibility and Rationale:" (1-2 bullets max, **The first bullet MUST incorporate the Primary Rationale provided above.**)
  2. "Treatment Recommendations:" (1-3 bullets: first-line drugs if eligible, or "Treatment not indicated" if not eligible)
  3. "Monitoring and Follow-up:" (1-2 bullets)
  4. "Special Considerations:" (0-2 bullets, ONLY if applicable and *not* the primary reason for eligibility)
  5. "References:" (1 line listing pages cited)
- OMIT "Additional Notes" and any other sections.
- Keep each bullet to ONE sentence (max 25 words per bullet).
- Total output: aim for 8-12 bullets maximum across all sections.
BULLETING AND CITATIONS RULES:
- Put citations at the end of each bullet using "[SASLT 2021, Page X]".
- Include evidence grade when available (e.g., "(Grade A)").
- Only cite pages 6–10 that actually contain the information.
STRICT ACCURACY AND CONSISTENCY RULES:
- **NO CONTRADICTIONS:** The "eligible" flag MUST match the rationale derived in the code.
- **Use ONLY the provided SASLT 2021 content;** do NOT add external knowledge.
- **BREVITY:** Each bullet = 1 sentence, max 25 words. Total = 8-12 bullets max.
PAGE-TO-TOPIC MAPPING GUIDANCE (for correct citations):
- Page 6: Standard initiation of treatment criteria, management algorithm.
- Page 7: Monitoring of untreated patients, CHB treatment principles.
- Page 8: Treatment drugs/regimens (ETV, TDF, TAF), agents not recommended.
- Page 9: Special populations (HBV-HCV, HBV-HDV, HBV-HIV, Immunocompromised).
- Page 10: Pregnancy-related recommendations.
"""
# Log the complete prompt being sent to LLM
logger.info(f"\n{'='*80}")
logger.info(f"LLM PROMPT")
logger.info(f"{'='*80}")
logger.info(f"\n{analysis_prompt}\n")
logger.info(f"{'='*80}\n")
# Get LLM response
llm = get_llm()
logger.info("Sending prompt to LLM...")
response = llm.invoke(analysis_prompt)
logger.info("LLM response received")
# Extract JSON from response
response_text = response.content if hasattr(response, 'content') else str(response)
# Normalize and trim response text to avoid parsing issues due to leading/trailing whitespace
if isinstance(response_text, str):
response_text = response_text.strip()
# Log LLM response
logger.info(f"\n{'='*80}")
logger.info(f"LLM RESPONSE")
logger.info(f"{'='*80}")
logger.info(f"\n{response_text}\n")
logger.info(f"{'='*80}\n")
# Try to parse JSON from response
try:
# Remove markdown code blocks if present
if '```json' in response_text:
json_start = response_text.find('```json') + 7
json_end = response_text.find('```', json_start)
response_text = response_text[json_start:json_end].strip()
elif '```' in response_text:
json_start = response_text.find('```') + 3
json_end = response_text.find('```', json_start)
response_text = response_text[json_start:json_end].strip()
# Find JSON in response (handle cases where LLM adds extra text)
json_start = response_text.find('{')
json_end = response_text.rfind('}') + 1
if json_start >= 0 and json_end > json_start:
json_str = response_text[json_start:json_end]
# Remove invisible Unicode separators that break json.loads
# Includes: ZERO WIDTH SPACE (\u200B), ZERO WIDTH NON-JOINER (\u200C),
# ZERO WIDTH JOINER (\u200D), BYTE ORDER MARK (\uFEFF), and NO-BREAK SPACE (\u00A0)
invisible_chars = ["\u200b", "\u200c", "\u200d", "\ufeff", "\xa0"]
for ch in invisible_chars:
json_str = json_str.replace(ch, "")
# Also remove their literal forms if present
json_str = json_str.replace("\u200b", "").replace("\u200c", "").replace("\u200d", "").replace("\ufeff", "").replace("\u00a0", "")
# Clean the JSON string to escape control characters within string values
cleaned_json_str = clean_json_string(json_str)
logger.debug(f"Cleaned JSON string (first 500 chars): {cleaned_json_str[:500]}")
# Parse the cleaned JSON
result = json.loads(cleaned_json_str)
logger.info(f"✅ Successfully parsed JSON response")
else:
raise ValueError("No JSON found in response")
# Validate and return result
# Ensure the LLM didn't override the deterministic eligibility flag
if result.get("eligible") != is_eligible:
logger.warning(f"LLM contradicted deterministic eligibility. Expected: {is_eligible}, Got: {result.get('eligible')}. Overriding.")
result["eligible"] = is_eligible # Force correct eligibility status
normalized_recs = normalize_recommendations(result.get("recommendations", ""))
assessment_result = {
"eligible": result.get("eligible", is_eligible),
"recommendations": normalized_recs
}
# Log final assessment
logger.info(f"\n{'='*80}")
logger.info(f"FINAL ASSESSMENT")
logger.info(f"{'='*80}")
logger.info(f"Eligible: {assessment_result['eligible']}")
logger.info(f"Recommendations length: {len(assessment_result['recommendations'])} characters")
logger.info(f"{'='*80}\n")
return assessment_result
except (json.JSONDecodeError, ValueError) as e:
logger.error(f"Failed to parse LLM response as JSON: {e}")
logger.error(f"Response text: {response_text}")
# Fallback: return error response
return {
"eligible": False,
"recommendations": f"Error parsing assessment results. Please try again. Error details: {str(e)}"
}
except Exception as e:
logger.error(f"Error in assess_hbv_eligibility: {str(e)}")
raise