Spaces:
Running
Running
Update the Assessment Results
Browse files- HBV_Eligibility_Results.csv +436 -0
- INTEGRATION_SUMMARY.md +268 -0
- LLAMAPARSE_INTEGRATION.md +265 -0
- core/config.py +4 -0
- core/data_loaders.py +213 -41
- core/hbv_assessment.py +102 -97
- core/utils.py +10 -2
- data/HBV_Eligibility_TestCases - To Be Tested(Sheet1).csv +31 -0
- test_assessment_fixed.py +238 -0
HBV_Eligibility_Results.csv
ADDED
|
@@ -0,0 +1,436 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Case ID,Age,Sex,Pregnancy Status,HBsAg,HBeAg,HBV DNA (IU/mL),ALT (U/L),Fibrosis/Cirrhosis Stage,Necroinflammation,Extrahepatic Manifestations,Immunosuppressive Therapy,Coinfections,Family History of HCC/Cirrhosis,Smoking,Comorbidities,Eligibility,Rationale
|
| 2 |
+
Case1,28,M,No,Positive (36 months),Positive,8500000,120,F1,Moderate,No,,,No,No,,True,"Eligibility and Rationale:
|
| 3 |
+
- Eligible: HBV DNA > 2,000 IU/mL and ALT > ULN, with moderate necroinflammation (Grade A) [SASLT 2021, Page 6]
|
| 4 |
+
- Meets criteria for treatment due to HBV DNA > 20,000 IU/mL and ALT > 2xULN (Grade B) [SASLT 2021, Page 6]
|
| 5 |
+
|
| 6 |
+
Treatment Recommendations:
|
| 7 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8]
|
| 8 |
+
|
| 9 |
+
Monitoring and Follow-up:
|
| 10 |
+
- Monitor treatment response per SASLT protocol every 3-6 months (Grade B) [SASLT 2021, Page 7]
|
| 11 |
+
|
| 12 |
+
Special Considerations:
|
| 13 |
+
- None applicable as patient has no coinfections, immunosuppression, or extrahepatic manifestations [SASLT 2021, Pages 6-9]
|
| 14 |
+
|
| 15 |
+
References:
|
| 16 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 17 |
+
Case2,45,F,No,Positive (60 months),Negative,15000,65,F1–F2,Moderate,No,,,No,No,Metabolic syndrome,True,"Eligibility and Rationale:
|
| 18 |
+
- Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, moderate fibrosis (Grade A) [SASLT 2021, Page 6]
|
| 19 |
+
- Necroinflammatory activity (A2) supports treatment initiation (Grade A) [SASLT 2021, Page 6]
|
| 20 |
+
|
| 21 |
+
Treatment Recommendations:
|
| 22 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8]
|
| 23 |
+
|
| 24 |
+
Monitoring and Follow-up:
|
| 25 |
+
- Monitor treatment response per SASLT protocol every 3-6 months (Grade D) [SASLT 2021, Page 7]
|
| 26 |
+
|
| 27 |
+
Special Considerations:
|
| 28 |
+
- Metabolic syndrome may require additional management alongside HBV treatment [SASLT 2021, Page 6]
|
| 29 |
+
|
| 30 |
+
References:
|
| 31 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 32 |
+
Case3,52,M,No,Positive (120 months),Negative,3500,32,F2–F3,Mild,No,,,No,No,,True,"Eligibility and Rationale:
|
| 33 |
+
- Eligible: HBV DNA > 2,000 IU/mL, moderate fibrosis (F2-F3), and chronic HBV infection (Grade A) [SASLT 2021, Page 6].
|
| 34 |
+
- ALT is within normal limits but fibrosis stage supports treatment initiation (Grade A) [SASLT 2021, Page 6].
|
| 35 |
+
|
| 36 |
+
Treatment Recommendations:
|
| 37 |
+
- Start monotherapy with ETV, TDF, or TAF as first-line treatment (Grade A) [SASLT 2021, Page 8].
|
| 38 |
+
|
| 39 |
+
Monitoring and Follow-up:
|
| 40 |
+
- Monitor treatment response per SASLT protocol, including HBV DNA and ALT levels every 3-6 months (Grade A) [SASLT 2021, Page 7].
|
| 41 |
+
|
| 42 |
+
Special Considerations:
|
| 43 |
+
- None applicable as patient has no coinfections, immunosuppression, or extrahepatic manifestations [SASLT 2021, Pages 6, 9].
|
| 44 |
+
|
| 45 |
+
References:
|
| 46 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 47 |
+
Case4,60,M,No,Positive (240 months),Negative,700,28,F4 (compensated),Mild,No,,,No,No,,True,"Eligibility and Rationale:
|
| 48 |
+
- Eligible: Cirrhosis (F4) with detectable HBV DNA, regardless of ALT levels (Grade A) [SASLT 2021, Page 6]
|
| 49 |
+
|
| 50 |
+
Treatment Recommendations:
|
| 51 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8]
|
| 52 |
+
|
| 53 |
+
Monitoring and Follow-up:
|
| 54 |
+
- Monitor treatment response per SASLT protocol every 3-6 months (Grade A) [SASLT 2021, Page 7]
|
| 55 |
+
|
| 56 |
+
Special Considerations:
|
| 57 |
+
- None applicable to this patient [SASLT 2021, Pages 6-10]
|
| 58 |
+
|
| 59 |
+
References:
|
| 60 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 61 |
+
Case5,22,F,No,Positive (18 months),Positive,25000000,27,F0,,No,,,No,No,,False,"Eligibility and Rationale:
|
| 62 |
+
- Not eligible: HBV DNA > 20,000 IU/mL but ALT ≤ 2xULN and age < 30 years (Grade B) [SASLT 2021, Page 6].
|
| 63 |
+
- No significant fibrosis or necroinflammation (F0-F1, A0) and no family history of HCC or cirrhosis [SASLT 2021, Page 6].
|
| 64 |
+
|
| 65 |
+
Treatment Recommendations:
|
| 66 |
+
- Treatment not indicated at this time [SASLT 2021, Page 6].
|
| 67 |
+
|
| 68 |
+
Monitoring and Follow-up:
|
| 69 |
+
- Monitor every 3-6 months for HBeAg-positive patients under 30 years (Grade B) [SASLT 2021, Page 7].
|
| 70 |
+
|
| 71 |
+
Special Considerations:
|
| 72 |
+
- None applicable to this patient.
|
| 73 |
+
|
| 74 |
+
References:
|
| 75 |
+
- Pages 6, 7: Treatment criteria, monitoring protocols"
|
| 76 |
+
Case6,40,M,No,Positive (84 months),Negative,900,22,F0–F1,,No,,,No,No,,False,"Eligibility and Rationale:
|
| 77 |
+
- Not eligible: HBV DNA < 2,000 IU/mL, ALT ≤ ULN, no significant fibrosis or necroinflammation [SASLT 2021, Page 6].
|
| 78 |
+
|
| 79 |
+
Treatment Recommendations:
|
| 80 |
+
- Treatment not indicated at this time [SASLT 2021, Page 6].
|
| 81 |
+
|
| 82 |
+
Monitoring and Follow-up:
|
| 83 |
+
- Monitor every 6-12 months (HBeAg-negative, HBV DNA < 2,000 IU/mL) (Grade B) [SASLT 2021, Page 7].
|
| 84 |
+
|
| 85 |
+
References:
|
| 86 |
+
- Pages 6, 7: Treatment criteria, monitoring protocols"
|
| 87 |
+
Case7,31,F,Yes (28 weeks),Positive (48 months),Positive,500000,35,F1,Mild,No,,,No,No,,True,"Eligibility and Rationale:
|
| 88 |
+
- Eligible: HBV DNA > 100,000 IU/mL during pregnancy warrants antiviral prophylaxis starting at 24-28 weeks (Grade D) [SASLT 2021, Page 10].
|
| 89 |
+
- Eligible: HBeAg-positive chronic HBV infection with high HBV DNA levels (> 20,000 IU/mL) and age > 30 years (Grade D) [SASLT 2021, Page 6].
|
| 90 |
+
|
| 91 |
+
Treatment Recommendations:
|
| 92 |
+
- Start antiviral prophylaxis with TDF or TAF at 24-28 weeks of pregnancy (Grade D) [SASLT 2021, Page 10].
|
| 93 |
+
- Preferred regimens for CHB treatment include TDF, TAF, or ETV monotherapy (Grade A) [SASLT 2021, Page 8].
|
| 94 |
+
|
| 95 |
+
Monitoring and Follow-up:
|
| 96 |
+
- Monitor HBV DNA and ALT levels every 3 months during pregnancy and postpartum (Grade D) [SASLT 2021, Page 7].
|
| 97 |
+
- Assess treatment response and adjust therapy as needed per SASLT protocol [SASLT 2021, Page 7].
|
| 98 |
+
|
| 99 |
+
Special Considerations:
|
| 100 |
+
- Breastfeeding is permitted for HBsAg-positive women on TDF (Grade B) [SASLT 2021, Page 10].
|
| 101 |
+
|
| 102 |
+
References:
|
| 103 |
+
- Pages 6, 7, 8, 10: Treatment criteria, drugs, monitoring, pregnancy-related recommendations"
|
| 104 |
+
Case8,35,M,No,Positive (72 months),Positive,9000000,33,F1,Mild,No,,,No,No,,True,"Eligibility and Rationale:
|
| 105 |
+
- Eligible: HBeAg-positive chronic HBV infection, age > 30 years, persistently high HBV DNA levels (Grade D) [SASLT 2021, Page 6].
|
| 106 |
+
- HBV DNA > 2,000 IU/mL and ALT ≤ ULN, meeting treatment criteria for HBeAg-positive patients > 30 years (Grade D) [SASLT 2021, Page 6].
|
| 107 |
+
|
| 108 |
+
Treatment Recommendations:
|
| 109 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8].
|
| 110 |
+
|
| 111 |
+
Monitoring and Follow-up:
|
| 112 |
+
- Monitor treatment response per SASLT protocol every 3-6 months (Grade B) [SASLT 2021, Page 7].
|
| 113 |
+
|
| 114 |
+
Special Considerations:
|
| 115 |
+
- None applicable to this patient.
|
| 116 |
+
|
| 117 |
+
References:
|
| 118 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 119 |
+
Case9,55,F,No,Positive (180 months),Negative,6200,45,F1–F2,Moderate,No,,,Yes (father HCC),No,,True,"Eligibility and Rationale:
|
| 120 |
+
- Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, moderate fibrosis (Grade A) [SASLT 2021, Page 6]
|
| 121 |
+
- Family history of HCC or cirrhosis supports treatment initiation (Grade D) [SASLT 2021, Page 6]
|
| 122 |
+
|
| 123 |
+
Treatment Recommendations:
|
| 124 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8]
|
| 125 |
+
|
| 126 |
+
Monitoring and Follow-up:
|
| 127 |
+
- Monitor treatment response per SASLT protocol every 3-6 months [SASLT 2021, Page 7]
|
| 128 |
+
|
| 129 |
+
Special Considerations:
|
| 130 |
+
- None applicable to this patient [SASLT 2021, Pages 6-10]
|
| 131 |
+
|
| 132 |
+
References:
|
| 133 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 134 |
+
Case10,63,M,No,Positive (300 months),Negative,50,32,F4 (decompensated),Mild,No,,,No,No,,True,"Eligibility and Rationale:
|
| 135 |
+
- Eligible: Patient has cirrhosis (F4 fibrosis) with detectable HBV DNA, regardless of ALT levels (Grade A) [SASLT 2021, Page 6].
|
| 136 |
+
|
| 137 |
+
Treatment Recommendations:
|
| 138 |
+
- Start monotherapy with ETV, TDF, or TAF as first-line treatment (Grade A) [SASLT 2021, Page 8].
|
| 139 |
+
|
| 140 |
+
Monitoring and Follow-up:
|
| 141 |
+
- Monitor treatment response per SASLT protocol, including HBV DNA and ALT levels every 3-6 months [SASLT 2021, Page 7].
|
| 142 |
+
|
| 143 |
+
Special Considerations:
|
| 144 |
+
- None applicable as patient has no coinfections, immunosuppression, or extrahepatic manifestations [SASLT 2021, Pages 6, 9].
|
| 145 |
+
|
| 146 |
+
References:
|
| 147 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 148 |
+
Case11,68,M,No,Positive (25 years),Negative,5800,41,F2,Mild,No,,,No,No,Diabetes,True,"Eligibility and Rationale:
|
| 149 |
+
- Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, and moderate fibrosis (F2-F3) (Grade A) [SASLT 2021, Page 6]
|
| 150 |
+
- Chronic HBV infection with significant liver disease warrants treatment initiation (Grade A) [SASLT 2021, Page 6]
|
| 151 |
+
|
| 152 |
+
Treatment Recommendations:
|
| 153 |
+
- Start monotherapy with ETV, TDF, or TAF as first-line treatment (Grade A) [SASLT 2021, Page 8]
|
| 154 |
+
|
| 155 |
+
Monitoring and Follow-up:
|
| 156 |
+
- Monitor treatment response per SASLT protocol, including HBV DNA and ALT levels every 3-6 months (Grade A) [SASLT 2021, Page 7]
|
| 157 |
+
|
| 158 |
+
Special Considerations:
|
| 159 |
+
- Patient's age (68 years) and comorbidity (diabetes) may require closer monitoring for treatment-related side effects [SASLT 2021, Page 6]
|
| 160 |
+
|
| 161 |
+
References:
|
| 162 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 163 |
+
Case12,50,F,No,Positive,Negative,450,32,,Mild,No,Yes (tacrolimus),,No,No,Post-transplant,False,"Eligibility and Rationale:
|
| 164 |
+
- Not eligible: HBV DNA < 2,000 IU/mL, ALT ≤ ULN, no significant fibrosis or necroinflammation [SASLT 2021, Page 6].
|
| 165 |
+
|
| 166 |
+
Treatment Recommendations:
|
| 167 |
+
- Treatment not indicated at this time [SASLT 2021, Page 6].
|
| 168 |
+
|
| 169 |
+
Monitoring and Follow-up:
|
| 170 |
+
- Monitor every 6-12 months (HBeAg-negative, HBV DNA < 2,000 IU/mL) (Grade B) [SASLT 2021, Page 7].
|
| 171 |
+
|
| 172 |
+
Special Considerations:
|
| 173 |
+
- Immunosuppression: Consider prophylaxis if undergoing immunosuppressive therapy (Grade A) [SASLT 2021, Page 9].
|
| 174 |
+
|
| 175 |
+
References:
|
| 176 |
+
- Pages 6, 7, 9: Treatment criteria, monitoring protocols, immunosuppression considerations"
|
| 177 |
+
Case13,27,M,No,Positive (36 months),Negative,1700,55,F1,Mild,No,,,No,No,,False,"Eligibility and Rationale:
|
| 178 |
+
- Not eligible: HBV DNA < 2,000 IU/mL, ALT > ULN but mild fibrosis and necroinflammation (Grade A) [SASLT 2021, Page 6]
|
| 179 |
+
|
| 180 |
+
Treatment Recommendations:
|
| 181 |
+
- Treatment not indicated at this time [SASLT 2021, Page 6]
|
| 182 |
+
|
| 183 |
+
Monitoring and Follow-up:
|
| 184 |
+
- Monitor every 6-12 months (HBeAg-negative, HBV DNA < 2,000 IU/mL) (Grade B) [SASLT 2021, Page 7]
|
| 185 |
+
|
| 186 |
+
Special Considerations:
|
| 187 |
+
- None applicable to this patient [SASLT 2021, Pages 6-10]
|
| 188 |
+
|
| 189 |
+
References:
|
| 190 |
+
- Pages 6, 7: Treatment criteria, monitoring protocols"
|
| 191 |
+
Case14,44,F,No,Positive,Negative,3000,70,F2,Moderate,No,,HCV RNA positive,No,No,,True,"Eligibility and Rationale:
|
| 192 |
+
- Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, and moderate fibrosis (Grade A) [SASLT 2021, Page 6].
|
| 193 |
+
- HBV-HCV coinfection requires concurrent or prior HBV treatment to prevent reactivation during DAA therapy (Grade A) [SASLT 2021, Page 9].
|
| 194 |
+
|
| 195 |
+
Treatment Recommendations:
|
| 196 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8].
|
| 197 |
+
|
| 198 |
+
Monitoring and Follow-up:
|
| 199 |
+
- Monitor HBV DNA and ALT every 4-8 weeks during DAA therapy and three months post-therapy (Grade D) [SASLT 2021, Page 9].
|
| 200 |
+
- Monitor treatment response per SASLT protocol (Grade A) [SASLT 2021, Page 7].
|
| 201 |
+
|
| 202 |
+
Special Considerations:
|
| 203 |
+
- HBV-HCV coinfection requires close monitoring to prevent HBV reactivation during HCV treatment (Grade A) [SASLT 2021, Page 9].
|
| 204 |
+
|
| 205 |
+
References:
|
| 206 |
+
- Pages 6, 7, 8, 9: Treatment criteria, drugs, monitoring, HBV-HCV coinfection"
|
| 207 |
+
Case15,33,M,No,Positive,Negative,25000,30,F1,Mild,No,,HIV positive,No,No,,False,"Eligibility and Rationale:
|
| 208 |
+
- Not eligible: HBV DNA > 2,000 IU/mL but ALT ≤ ULN and no significant fibrosis or necroinflammation (Grade A) [SASLT 2021, Page 6].
|
| 209 |
+
|
| 210 |
+
Treatment Recommendations:
|
| 211 |
+
- Treatment not indicated at this time [SASLT 2021, Page 6].
|
| 212 |
+
|
| 213 |
+
Monitoring and Follow-up:
|
| 214 |
+
- Monitor every 3 months for the first year, then every 6 months (HBeAg-negative, HBV DNA ≥ 2,000 IU/mL) (Grade D) [SASLT 2021, Page 7].
|
| 215 |
+
|
| 216 |
+
Special Considerations:
|
| 217 |
+
- HBV-HIV coinfection: ensure ART includes TDF- or TAF-based regimen (Grade A) [SASLT 2021, Page 9].
|
| 218 |
+
|
| 219 |
+
References:
|
| 220 |
+
- Pages 6, 7, 9: Treatment criteria, monitoring protocols, HIV coinfection"
|
| 221 |
+
Case16,30,F,Yes (30 weeks),Positive,Positive,40000,28,F0,,No,,,No,No,,False,"Eligibility and Rationale:
|
| 222 |
+
- Not eligible: HBV DNA > 2,000 IU/mL but ALT ≤ ULN and no significant fibrosis or necroinflammation [SASLT 2021, Page 6].
|
| 223 |
+
- Pregnancy alone does not meet treatment criteria unless HBV DNA > 100,000 IU/mL at 24-28 weeks [SASLT 2021, Page 10].
|
| 224 |
+
|
| 225 |
+
Treatment Recommendations:
|
| 226 |
+
- Treatment not indicated at this time [SASLT 2021, Page 6].
|
| 227 |
+
|
| 228 |
+
Monitoring and Follow-up:
|
| 229 |
+
- Monitor every 3-6 months for HBeAg-positive patients under 30 years (Grade B) [SASLT 2021, Page 7].
|
| 230 |
+
- Assess HBV DNA and ALT levels regularly to evaluate disease progression [SASLT 2021, Page 6].
|
| 231 |
+
|
| 232 |
+
Special Considerations:
|
| 233 |
+
- Consider antiviral prophylaxis with TDF if HBV DNA exceeds 100,000 IU/mL at 24-28 weeks of pregnancy [SASLT 2021, Page 10].
|
| 234 |
+
|
| 235 |
+
References:
|
| 236 |
+
- Pages 6, 7, 10: Treatment criteria, monitoring protocols, pregnancy considerations"
|
| 237 |
+
Case17,39,M,No,Positive,Negative,4800,250,F1,Marked,No,,,No,No,,True,"Eligibility and Rationale:
|
| 238 |
+
- Eligible: HBV DNA > 2,000 IU/mL and ALT > ULN, regardless of HBeAg status (Grade A) [SASLT 2021, Page 6]
|
| 239 |
+
- ALT > 2x ULN supports treatment initiation (Grade B) [SASLT 2021, Page 6]
|
| 240 |
+
|
| 241 |
+
Treatment Recommendations:
|
| 242 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8]
|
| 243 |
+
|
| 244 |
+
Monitoring and Follow-up:
|
| 245 |
+
- Monitor treatment response per SASLT protocol every 3-6 months (Grade B) [SASLT 2021, Page 7]
|
| 246 |
+
|
| 247 |
+
Special Considerations:
|
| 248 |
+
- None applicable to this patient [SASLT 2021, Pages 6-10]
|
| 249 |
+
|
| 250 |
+
References:
|
| 251 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 252 |
+
Case18,49,F,No,Positive,Negative,2300,37,F1,Mild,No,,,Yes (mother cirrhosis),No,,True,"Eligibility and Rationale:
|
| 253 |
+
- Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, and family history of HCC or cirrhosis (Grade D) [SASLT 2021, Page 6].
|
| 254 |
+
|
| 255 |
+
Treatment Recommendations:
|
| 256 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8].
|
| 257 |
+
|
| 258 |
+
Monitoring and Follow-up:
|
| 259 |
+
- Monitor treatment response per SASLT protocol, including HBV DNA and ALT levels every 3-6 months [SASLT 2021, Page 7].
|
| 260 |
+
|
| 261 |
+
Special Considerations:
|
| 262 |
+
- Family history of HCC or cirrhosis increases the need for treatment to reduce long-term risks [SASLT 2021, Page 6].
|
| 263 |
+
|
| 264 |
+
References:
|
| 265 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 266 |
+
Case19,56,M,No,Positive,Negative,10,30,F4 (compensated),Mild,No,,,No,No,,True,"Eligibility and Rationale:
|
| 267 |
+
- Eligible: Patient has cirrhosis (F4 fibrosis) with detectable HBV DNA, regardless of ALT levels (Grade A) [SASLT 2021, Page 6].
|
| 268 |
+
|
| 269 |
+
Treatment Recommendations:
|
| 270 |
+
- Start monotherapy with ETV, TDF, or TAF as first-line treatment (Grade A) [SASLT 2021, Page 8].
|
| 271 |
+
|
| 272 |
+
Monitoring and Follow-up:
|
| 273 |
+
- Monitor treatment response per SASLT protocol every 3-6 months (Grade A) [SASLT 2021, Page 7].
|
| 274 |
+
|
| 275 |
+
Special Considerations:
|
| 276 |
+
- None applicable to this patient.
|
| 277 |
+
|
| 278 |
+
References:
|
| 279 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 280 |
+
Case20,41,F,No,Positive (10 years),Negative,1900,28,F1,Mild,No,,,No,No,,False,"Eligibility and Rationale:
|
| 281 |
+
- Not eligible: HBV DNA < 2,000 IU/mL, ALT ≤ ULN, no significant fibrosis or necroinflammation (Grade A) [SASLT 2021, Page 6]
|
| 282 |
+
|
| 283 |
+
Treatment Recommendations:
|
| 284 |
+
- Treatment not indicated at this time [SASLT 2021, Page 6]
|
| 285 |
+
|
| 286 |
+
Monitoring and Follow-up:
|
| 287 |
+
- Monitor every 6-12 months (HBeAg-negative, HBV DNA < 2,000 IU/mL) (Grade B) [SASLT 2021, Page 7]
|
| 288 |
+
|
| 289 |
+
Special Considerations:
|
| 290 |
+
- None applicable to this patient [SASLT 2021, Page 6]
|
| 291 |
+
|
| 292 |
+
References:
|
| 293 |
+
- Pages 6, 7: Treatment criteria, monitoring protocols"
|
| 294 |
+
Case21,56,F,No,Positive,Negative,0,20,F4 (compensated),Mild,No,,,No,No,,True,"Eligibility and Rationale:
|
| 295 |
+
- Eligible: Patient has cirrhosis (F4 fibrosis) with detectable HBsAg, regardless of HBV DNA and ALT levels (Grade A) [SASLT 2021, Page 6].
|
| 296 |
+
|
| 297 |
+
Treatment Recommendations:
|
| 298 |
+
- Start monotherapy with ETV, TDF, or TAF as the preferred treatment options (Grade A) [SASLT 2021, Page 8].
|
| 299 |
+
|
| 300 |
+
Monitoring and Follow-up:
|
| 301 |
+
- Monitor treatment response per SASLT protocol, including HBV DNA, ALT, and fibrosis assessment [SASLT 2021, Page 7].
|
| 302 |
+
|
| 303 |
+
Special Considerations:
|
| 304 |
+
- None applicable as the patient has no coinfections, immunosuppression, or other special conditions [SASLT 2021, Pages 6-10].
|
| 305 |
+
|
| 306 |
+
References:
|
| 307 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 308 |
+
Case22,70,M,No,Positive (30 years),Negative,1800,27,F2,Mild,No,,,No,Yes (smoker),Hypertension,False,"Eligibility and Rationale:
|
| 309 |
+
- Not eligible: HBV DNA < 2,000 IU/mL, ALT ≤ ULN, moderate fibrosis but does not meet treatment thresholds [SASLT 2021, Page 6].
|
| 310 |
+
|
| 311 |
+
Treatment Recommendations:
|
| 312 |
+
- Treatment not indicated at this time [SASLT 2021, Page 6].
|
| 313 |
+
|
| 314 |
+
Monitoring and Follow-up:
|
| 315 |
+
- Monitor every 6-12 months (HBeAg-negative, HBV DNA < 2,000 IU/mL) (Grade B) [SASLT 2021, Page 7].
|
| 316 |
+
|
| 317 |
+
References:
|
| 318 |
+
- Pages 6, 7: Treatment criteria, monitoring protocols"
|
| 319 |
+
Case23,33,F,Yes (12 weeks),Positive,Positive,300000,40,F1,Mild,No,,,No,No,,True,"Eligibility and Rationale:
|
| 320 |
+
- Eligible: HBV DNA > 100,000 IU/mL during pregnancy warrants antiviral prophylaxis (Grade D) [SASLT 2021, Page 10]
|
| 321 |
+
- ALT > ULN and HBeAg-positive status further support treatment initiation (Grade A) [SASLT 2021, Page 6]
|
| 322 |
+
|
| 323 |
+
Treatment Recommendations:
|
| 324 |
+
- Start antiviral prophylaxis with TDF or TAF at 24-28 weeks of pregnancy (Grade D) [SASLT 2021, Page 10]
|
| 325 |
+
- Preferred regimens include TDF or TAF as monotherapy (Grade A) [SASLT 2021, Page 8]
|
| 326 |
+
|
| 327 |
+
Monitoring and Follow-up:
|
| 328 |
+
- Monitor HBV DNA and ALT levels every 3 months during treatment (Grade D) [SASLT 2021, Page 7]
|
| 329 |
+
- Postpartum follow-up to assess treatment continuation or cessation [SASLT 2021, Page 10]
|
| 330 |
+
|
| 331 |
+
Special Considerations:
|
| 332 |
+
- Breastfeeding is permitted while on TDF therapy (Grade B) [SASLT 2021, Page 10]
|
| 333 |
+
|
| 334 |
+
References:
|
| 335 |
+
- Pages 6, 7, 8, 10: Treatment criteria, drugs, monitoring, pregnancy considerations"
|
| 336 |
+
Case24,46,M,No,Positive (8 years),Negative,50000,48,F2,Moderate,Yes (vasculitis),,,No,No,,True,"Eligibility and Rationale:
|
| 337 |
+
- Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, moderate fibrosis (Grade A) [SASLT 2021, Page 6]
|
| 338 |
+
- Extrahepatic manifestations further support treatment initiation (Grade D) [SASLT 2021, Page 6]
|
| 339 |
+
|
| 340 |
+
Treatment Recommendations:
|
| 341 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8]
|
| 342 |
+
|
| 343 |
+
Monitoring and Follow-up:
|
| 344 |
+
- Monitor treatment response every 3-6 months per SASLT protocol (Grade B) [SASLT 2021, Page 7]
|
| 345 |
+
|
| 346 |
+
Special Considerations:
|
| 347 |
+
- None applicable to this patient.
|
| 348 |
+
|
| 349 |
+
References:
|
| 350 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 351 |
+
Case25,58,F,No,Positive,Negative,2000,60,F2,Moderate,No,,,No,No,CKD stage 3,True,"Eligibility and Rationale:
|
| 352 |
+
- Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, moderate fibrosis (Grade A) [SASLT 2021, Page 6]
|
| 353 |
+
- Moderate necroinflammation (A2) and fibrosis (F2-F3) support treatment initiation (Grade A) [SASLT 2021, Page 6]
|
| 354 |
+
|
| 355 |
+
Treatment Recommendations:
|
| 356 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8]
|
| 357 |
+
|
| 358 |
+
Monitoring and Follow-up:
|
| 359 |
+
- Monitor treatment response every 3-6 months per SASLT protocol [SASLT 2021, Page 7]
|
| 360 |
+
- Assess HBV DNA, ALT, and fibrosis progression during follow-up [SASLT 2021, Page 7]
|
| 361 |
+
|
| 362 |
+
Special Considerations:
|
| 363 |
+
- CKD stage 3: Prefer TAF over TDF due to better renal safety profile (Grade A) [SASLT 2021, Page 8]
|
| 364 |
+
|
| 365 |
+
References:
|
| 366 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 367 |
+
Case26,29,M,No,Positive,Positive,2500000,33,F0,,No,,,No,No,,False,"Eligibility and Rationale:
|
| 368 |
+
- Not eligible: HBV DNA > 2,000 IU/mL but ALT ≤ ULN and no significant fibrosis or necroinflammation [SASLT 2021, Page 6].
|
| 369 |
+
|
| 370 |
+
Treatment Recommendations:
|
| 371 |
+
- Treatment not indicated at this time [SASLT 2021, Page 6].
|
| 372 |
+
|
| 373 |
+
Monitoring and Follow-up:
|
| 374 |
+
- Monitor every 3-6 months (HBeAg-positive, HBV DNA > 2,000 IU/mL, ALT ≤ ULN) (Grade B) [SASLT 2021, Page 7].
|
| 375 |
+
|
| 376 |
+
Special Considerations:
|
| 377 |
+
- None applicable to this patient.
|
| 378 |
+
|
| 379 |
+
References:
|
| 380 |
+
- Pages 6, 7: Treatment criteria, monitoring protocols"
|
| 381 |
+
Case27,54,M,No,Positive,Negative,750,18,F4 (compensated),Mild,No,,,No,No,,True,"Eligibility and Rationale:
|
| 382 |
+
- Eligible: Patient has cirrhosis (F4 fibrosis) with detectable HBV DNA, regardless of ALT levels (Grade A) [SASLT 2021, Page 6].
|
| 383 |
+
|
| 384 |
+
Treatment Recommendations:
|
| 385 |
+
- Start monotherapy with ETV, TDF, or TAF as the preferred treatment options (Grade A) [SASLT 2021, Page 8].
|
| 386 |
+
|
| 387 |
+
Monitoring and Follow-up:
|
| 388 |
+
- Monitor treatment response per SASLT protocol, including HBV DNA and ALT levels every 3-6 months [SASLT 2021, Page 7].
|
| 389 |
+
|
| 390 |
+
Special Considerations:
|
| 391 |
+
- None applicable as the patient has no coinfections, immunosuppression, or other special conditions [SASLT 2021, Pages 6-10].
|
| 392 |
+
|
| 393 |
+
References:
|
| 394 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 395 |
+
Case28,38,F,No,Positive,Negative,6000,80,F2,Moderate,No,,,No,No,Obesity,True,"Eligibility and Rationale:
|
| 396 |
+
- Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, and moderate fibrosis (Grade A) [SASLT 2021, Page 6]
|
| 397 |
+
- Necroinflammatory activity (A2) and fibrosis stage (F2-F3) further support treatment initiation (Grade A) [SASLT 2021, Page 6]
|
| 398 |
+
|
| 399 |
+
Treatment Recommendations:
|
| 400 |
+
- Start monotherapy with ETV, TDF, or TAF as first-line treatment (Grade A) [SASLT 2021, Page 8]
|
| 401 |
+
|
| 402 |
+
Monitoring and Follow-up:
|
| 403 |
+
- Monitor treatment response per SASLT protocol, including HBV DNA and ALT levels every 3-6 months initially (Grade A) [SASLT 2021, Page 7]
|
| 404 |
+
- Assess fibrosis progression and treatment adherence regularly (Grade A) [SASLT 2021, Page 7]
|
| 405 |
+
|
| 406 |
+
Special Considerations:
|
| 407 |
+
- Obesity may impact liver disease progression; consider lifestyle interventions alongside antiviral therapy [SASLT 2021, Page 6]
|
| 408 |
+
|
| 409 |
+
References:
|
| 410 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
| 411 |
+
Case29,42,M,No,Positive,Negative,1200,25,F1,Mild,No,,,No,No,,False,"Eligibility and Rationale:
|
| 412 |
+
- Not eligible: HBV DNA < 2,000 IU/mL, ALT ≤ ULN, no significant fibrosis or necroinflammation (Grade A) [SASLT 2021, Page 6].
|
| 413 |
+
|
| 414 |
+
Treatment Recommendations:
|
| 415 |
+
- Treatment not indicated at this time [SASLT 2021, Page 6].
|
| 416 |
+
|
| 417 |
+
Monitoring and Follow-up:
|
| 418 |
+
- Monitor every 6-12 months (HBeAg-negative, HBV DNA < 2,000 IU/mL) (Grade B) [SASLT 2021, Page 7].
|
| 419 |
+
|
| 420 |
+
References:
|
| 421 |
+
- Pages 6, 7: Treatment criteria, monitoring protocols"
|
| 422 |
+
Case30,25,F,No,Positive (2 years),Positive,12000000,95,F1,Moderate,No,,,No,No,,True,"Eligibility and Rationale:
|
| 423 |
+
- Eligible: HBV DNA > 20,000 IU/mL and ALT > 2xULN, regardless of fibrosis stage (Grade B) [SASLT 2021, Page 6]
|
| 424 |
+
- Moderate necroinflammatory activity (A2) supports treatment initiation (Grade A) [SASLT 2021, Page 6]
|
| 425 |
+
|
| 426 |
+
Treatment Recommendations:
|
| 427 |
+
- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8]
|
| 428 |
+
|
| 429 |
+
Monitoring and Follow-up:
|
| 430 |
+
- Monitor treatment response per SASLT protocol every 3-6 months (Grade B) [SASLT 2021, Page 7]
|
| 431 |
+
|
| 432 |
+
Special Considerations:
|
| 433 |
+
- None applicable to this patient [SASLT 2021, Pages 6-10]
|
| 434 |
+
|
| 435 |
+
References:
|
| 436 |
+
- Pages 6, 7, 8: Treatment criteria, drugs, monitoring protocols"
|
INTEGRATION_SUMMARY.md
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LlamaParse Integration Summary
|
| 2 |
+
|
| 3 |
+
## Changes Made
|
| 4 |
+
|
| 5 |
+
### 1. **core/data_loaders.py** - Complete Replacement
|
| 6 |
+
**Status**: ✅ Complete
|
| 7 |
+
|
| 8 |
+
**Changes**:
|
| 9 |
+
- ❌ Removed: `PyMuPDF4LLMLoader` and `TesseractBlobParser`
|
| 10 |
+
- ✅ Added: `LlamaParse` and `SimpleDirectoryReader` from llama-index
|
| 11 |
+
- ✅ Added: `os` module for environment variable handling
|
| 12 |
+
|
| 13 |
+
**New Functions**:
|
| 14 |
+
1. `load_pdf_documents(pdf_path, api_key=None)` - Basic LlamaParse loader
|
| 15 |
+
2. `load_pdf_documents_advanced(pdf_path, api_key=None, premium_mode=False)` - Advanced loader with premium features
|
| 16 |
+
3. `load_multiple_pdfs(pdf_directory, api_key=None, file_pattern="*.pdf")` - Batch processing
|
| 17 |
+
|
| 18 |
+
**Key Features**:
|
| 19 |
+
- Medical document optimized parsing instructions
|
| 20 |
+
- Accurate page numbering with `split_by_page=True`
|
| 21 |
+
- Preserves borderless tables and complex layouts
|
| 22 |
+
- Enhanced metadata tracking
|
| 23 |
+
- Premium mode option for GPT-4o parsing
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
### 2. **core/config.py** - Configuration Updates
|
| 28 |
+
**Status**: ✅ Complete
|
| 29 |
+
|
| 30 |
+
**Changes**:
|
| 31 |
+
```python
|
| 32 |
+
# Added to Settings class
|
| 33 |
+
LLAMA_CLOUD_API_KEY: str | None = None
|
| 34 |
+
LLAMA_PREMIUM_MODE: bool = False
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
**Purpose**:
|
| 38 |
+
- Store LlamaParse API key from environment variables
|
| 39 |
+
- Control premium/basic parsing mode
|
| 40 |
+
- Centralized configuration management
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
### 3. **core/utils.py** - Pipeline Integration
|
| 45 |
+
**Status**: ✅ Complete
|
| 46 |
+
|
| 47 |
+
**Changes**:
|
| 48 |
+
1. **Import Update** (Line 12):
|
| 49 |
+
```python
|
| 50 |
+
from .config import get_embedding_model, VECTOR_STORE_DIR, CHUNKS_PATH, NEW_DATA, PROCESSED_DATA, settings
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
2. **Function Update** `_load_documents_for_file()` (Lines 118-141):
|
| 54 |
+
```python
|
| 55 |
+
def _load_documents_for_file(file_path: Path) -> List[Document]:
|
| 56 |
+
try:
|
| 57 |
+
if file_path.suffix.lower() == '.pdf':
|
| 58 |
+
# Use advanced LlamaParse loader with settings from config
|
| 59 |
+
api_key = settings.LLAMA_CLOUD_API_KEY
|
| 60 |
+
premium_mode = settings.LLAMA_PREMIUM_MODE
|
| 61 |
+
|
| 62 |
+
return data_loaders.load_pdf_documents_advanced(
|
| 63 |
+
file_path,
|
| 64 |
+
api_key=api_key,
|
| 65 |
+
premium_mode=premium_mode
|
| 66 |
+
)
|
| 67 |
+
return data_loaders.load_markdown_documents(file_path)
|
| 68 |
+
except Exception as e:
|
| 69 |
+
logger.error(f"Failed to load {file_path}: {e}")
|
| 70 |
+
return []
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
**Impact**:
|
| 74 |
+
- All PDF processing now uses LlamaParse automatically
|
| 75 |
+
- Reads configuration from environment variables
|
| 76 |
+
- Maintains backward compatibility with markdown files
|
| 77 |
+
|
| 78 |
+
---
|
| 79 |
+
|
| 80 |
+
## New Files Created
|
| 81 |
+
|
| 82 |
+
### 1. **LLAMAPARSE_INTEGRATION.md**
|
| 83 |
+
Complete documentation including:
|
| 84 |
+
- Setup instructions
|
| 85 |
+
- Configuration guide
|
| 86 |
+
- Usage examples
|
| 87 |
+
- Cost considerations
|
| 88 |
+
- Troubleshooting
|
| 89 |
+
- Migration guide
|
| 90 |
+
|
| 91 |
+
### 2. **test_llamaparse.py**
|
| 92 |
+
Test suite with:
|
| 93 |
+
- Configuration checker
|
| 94 |
+
- Single PDF test
|
| 95 |
+
- Batch processing test
|
| 96 |
+
- Full pipeline test
|
| 97 |
+
|
| 98 |
+
### 3. **INTEGRATION_SUMMARY.md** (this file)
|
| 99 |
+
Quick reference for all changes
|
| 100 |
+
|
| 101 |
+
---
|
| 102 |
+
|
| 103 |
+
## Environment Variables Required
|
| 104 |
+
|
| 105 |
+
Add to your `.env` file:
|
| 106 |
+
|
| 107 |
+
```env
|
| 108 |
+
# Required for LlamaParse
|
| 109 |
+
LLAMA_CLOUD_API_KEY=llx-your-api-key-here
|
| 110 |
+
|
| 111 |
+
# Optional: Enable premium mode (default: False)
|
| 112 |
+
LLAMA_PREMIUM_MODE=False
|
| 113 |
+
|
| 114 |
+
# Existing (still required)
|
| 115 |
+
OPENAI_API_KEY=your-openai-key
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
---
|
| 119 |
+
|
| 120 |
+
## Installation Requirements
|
| 121 |
+
|
| 122 |
+
```bash
|
| 123 |
+
pip install llama-parse llama-index-core
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
---
|
| 127 |
+
|
| 128 |
+
## How to Use
|
| 129 |
+
|
| 130 |
+
### Automatic Processing (Recommended)
|
| 131 |
+
1. Set `LLAMA_CLOUD_API_KEY` in `.env`
|
| 132 |
+
2. Place PDFs in `data/new_data/PROVIDER/`
|
| 133 |
+
3. Run your application - documents are processed automatically on startup
|
| 134 |
+
|
| 135 |
+
### Manual Processing
|
| 136 |
+
```python
|
| 137 |
+
from core.utils import process_new_data_and_update_vector_store
|
| 138 |
+
|
| 139 |
+
# Process all new documents
|
| 140 |
+
vector_store = process_new_data_and_update_vector_store()
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
### Direct PDF Loading
|
| 144 |
+
```python
|
| 145 |
+
from pathlib import Path
|
| 146 |
+
from core.data_loaders import load_pdf_documents_advanced
|
| 147 |
+
|
| 148 |
+
pdf_path = Path("data/new_data/SASLT/guideline.pdf")
|
| 149 |
+
documents = load_pdf_documents_advanced(pdf_path)
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
---
|
| 153 |
+
|
| 154 |
+
## Testing
|
| 155 |
+
|
| 156 |
+
Run the test suite:
|
| 157 |
+
```bash
|
| 158 |
+
python test_llamaparse.py
|
| 159 |
+
```
|
| 160 |
+
|
| 161 |
+
This will:
|
| 162 |
+
1. ✅ Check configuration
|
| 163 |
+
2. ✅ Test single PDF loading
|
| 164 |
+
3. ✅ (Optional) Test batch processing
|
| 165 |
+
4. ✅ (Optional) Test full pipeline
|
| 166 |
+
|
| 167 |
+
---
|
| 168 |
+
|
| 169 |
+
## Backward Compatibility
|
| 170 |
+
|
| 171 |
+
✅ **Fully backward compatible**:
|
| 172 |
+
- Existing processed documents remain valid
|
| 173 |
+
- Vector store continues to work
|
| 174 |
+
- Markdown processing unchanged
|
| 175 |
+
- No breaking changes to API
|
| 176 |
+
|
| 177 |
+
---
|
| 178 |
+
|
| 179 |
+
## Benefits
|
| 180 |
+
|
| 181 |
+
| Aspect | Before (PyMuPDF4LLMLoader) | After (LlamaParse) |
|
| 182 |
+
|--------|---------------------------|-------------------|
|
| 183 |
+
| **Borderless Tables** | ❌ Poor | ✅ Excellent |
|
| 184 |
+
| **Complex Layouts** | ⚠️ Moderate | ✅ Excellent |
|
| 185 |
+
| **Medical Terminology** | ⚠️ Moderate | ✅ Excellent |
|
| 186 |
+
| **Page Numbering** | ✅ Good | ✅ Excellent |
|
| 187 |
+
| **Processing Speed** | ✅ Fast (local) | ⚠️ Slower (cloud) |
|
| 188 |
+
| **Cost** | ✅ Free | ⚠️ ~$0.003-0.01/page |
|
| 189 |
+
| **Accuracy** | ⚠️ Moderate | ✅ High |
|
| 190 |
+
|
| 191 |
+
---
|
| 192 |
+
|
| 193 |
+
## Cost Estimation
|
| 194 |
+
|
| 195 |
+
### Basic Mode (~$0.003/page)
|
| 196 |
+
- 50-page guideline: ~$0.15
|
| 197 |
+
- 100-page guideline: ~$0.30
|
| 198 |
+
|
| 199 |
+
### Premium Mode (~$0.01/page)
|
| 200 |
+
- 50-page guideline: ~$0.50
|
| 201 |
+
- 100-page guideline: ~$1.00
|
| 202 |
+
|
| 203 |
+
**Note**: LlamaParse caches results, so re-processing is free.
|
| 204 |
+
|
| 205 |
+
---
|
| 206 |
+
|
| 207 |
+
## Workflow Example
|
| 208 |
+
|
| 209 |
+
```
|
| 210 |
+
1. User places PDF in data/new_data/SASLT/
|
| 211 |
+
└── new_guideline.pdf
|
| 212 |
+
|
| 213 |
+
2. Application startup triggers processing
|
| 214 |
+
├── Detects new PDF
|
| 215 |
+
├── Calls load_pdf_documents_advanced()
|
| 216 |
+
├── LlamaParse processes with medical optimizations
|
| 217 |
+
├── Extracts 50 pages with accurate metadata
|
| 218 |
+
└── Returns Document objects
|
| 219 |
+
|
| 220 |
+
3. Pipeline continues
|
| 221 |
+
├── Splits into 245 chunks
|
| 222 |
+
├── Updates vector store
|
| 223 |
+
└── Moves to data/processed_data/SASLT/new_guideline_20251111_143022.pdf
|
| 224 |
+
|
| 225 |
+
4. Ready for RAG queries
|
| 226 |
+
└── Vector store contains new guideline content
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
---
|
| 230 |
+
|
| 231 |
+
## Next Steps
|
| 232 |
+
|
| 233 |
+
1. ✅ Set `LLAMA_CLOUD_API_KEY` in `.env`
|
| 234 |
+
2. ✅ Install dependencies: `pip install llama-parse llama-index-core`
|
| 235 |
+
3. ✅ Test with: `python test_llamaparse.py`
|
| 236 |
+
4. ✅ Place PDFs in `data/new_data/PROVIDER/`
|
| 237 |
+
5. ✅ Run application and verify processing
|
| 238 |
+
|
| 239 |
+
---
|
| 240 |
+
|
| 241 |
+
## Support & Troubleshooting
|
| 242 |
+
|
| 243 |
+
### Common Issues
|
| 244 |
+
|
| 245 |
+
**1. API Key Not Found**
|
| 246 |
+
```
|
| 247 |
+
ValueError: LlamaCloud API key not found
|
| 248 |
+
```
|
| 249 |
+
→ Set `LLAMA_CLOUD_API_KEY` in `.env`
|
| 250 |
+
|
| 251 |
+
**2. Import Errors**
|
| 252 |
+
```
|
| 253 |
+
ModuleNotFoundError: No module named 'llama_parse'
|
| 254 |
+
```
|
| 255 |
+
→ Run: `pip install llama-parse llama-index-core`
|
| 256 |
+
|
| 257 |
+
**3. Slow Processing**
|
| 258 |
+
→ Normal for cloud processing (30-60s per document)
|
| 259 |
+
→ Subsequent runs use cache (much faster)
|
| 260 |
+
|
| 261 |
+
### Logs
|
| 262 |
+
Check `logs/app.log` for detailed processing information
|
| 263 |
+
|
| 264 |
+
---
|
| 265 |
+
|
| 266 |
+
**Integration Date**: November 11, 2025
|
| 267 |
+
**Status**: ✅ Production Ready
|
| 268 |
+
**Version**: 1.0
|
LLAMAPARSE_INTEGRATION.md
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LlamaParse Integration Guide
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
The HBV AI Assistant now uses **LlamaParse** for advanced PDF parsing, replacing PyMuPDF4LLMLoader. LlamaParse excels at:
|
| 5 |
+
- ✅ Borderless tables (common in medical guidelines)
|
| 6 |
+
- ✅ Complex document layouts
|
| 7 |
+
- ✅ Hierarchical section preservation
|
| 8 |
+
- ✅ Accurate page numbering
|
| 9 |
+
- ✅ Medical terminology and dosage tables
|
| 10 |
+
|
| 11 |
+
## Setup
|
| 12 |
+
|
| 13 |
+
### 1. Install Required Packages
|
| 14 |
+
```bash
|
| 15 |
+
pip install llama-parse llama-index-core
|
| 16 |
+
```
|
| 17 |
+
|
| 18 |
+
### 2. Get Your API Key
|
| 19 |
+
1. Visit: https://cloud.llamaindex.ai/api-key
|
| 20 |
+
2. Sign up/login and generate an API key
|
| 21 |
+
3. Copy your API key (format: `llx-...`)
|
| 22 |
+
|
| 23 |
+
### 3. Configure Environment Variables
|
| 24 |
+
Add to your `.env` file:
|
| 25 |
+
|
| 26 |
+
```env
|
| 27 |
+
# Required: LlamaParse API Key
|
| 28 |
+
LLAMA_CLOUD_API_KEY=llx-your-api-key-here
|
| 29 |
+
|
| 30 |
+
# Optional: Enable premium GPT-4o mode (higher accuracy, costs more)
|
| 31 |
+
LLAMA_PREMIUM_MODE=False
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
## How It Works
|
| 35 |
+
|
| 36 |
+
### Automatic Processing Pipeline
|
| 37 |
+
When you process new documents from `data/new_data/`, the system automatically:
|
| 38 |
+
|
| 39 |
+
1. **Detects PDF files** in `data/new_data/PROVIDER/` directories
|
| 40 |
+
2. **Uses LlamaParse** with medical document optimizations:
|
| 41 |
+
- Preserves table structures (including borderless tables)
|
| 42 |
+
- Maintains hierarchical headings
|
| 43 |
+
- Extracts dosage information accurately
|
| 44 |
+
- Keeps reference citations intact
|
| 45 |
+
3. **Splits by page** for accurate page numbering
|
| 46 |
+
4. **Extracts metadata**: provider, disease, page numbers
|
| 47 |
+
5. **Updates vector store** for RAG queries
|
| 48 |
+
|
| 49 |
+
### Configuration Options
|
| 50 |
+
|
| 51 |
+
#### Basic Mode (Default)
|
| 52 |
+
```python
|
| 53 |
+
# In .env
|
| 54 |
+
LLAMA_CLOUD_API_KEY=llx-your-key
|
| 55 |
+
LLAMA_PREMIUM_MODE=False
|
| 56 |
+
```
|
| 57 |
+
- Uses standard LlamaParse parsing
|
| 58 |
+
- Good accuracy for most medical documents
|
| 59 |
+
- Lower cost
|
| 60 |
+
|
| 61 |
+
#### Premium Mode
|
| 62 |
+
```python
|
| 63 |
+
# In .env
|
| 64 |
+
LLAMA_CLOUD_API_KEY=llx-your-key
|
| 65 |
+
LLAMA_PREMIUM_MODE=True
|
| 66 |
+
```
|
| 67 |
+
- Uses GPT-4o for parsing
|
| 68 |
+
- Highest accuracy for complex tables
|
| 69 |
+
- Higher cost per page
|
| 70 |
+
- Recommended for critical medical guidelines
|
| 71 |
+
|
| 72 |
+
## Usage
|
| 73 |
+
|
| 74 |
+
### Processing New Documents
|
| 75 |
+
|
| 76 |
+
1. **Place PDFs** in the appropriate directory:
|
| 77 |
+
```
|
| 78 |
+
data/new_data/SASLT/guideline.pdf
|
| 79 |
+
data/new_data/WHO/recommendations.pdf
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
2. **Run the processing** (automatic on app startup or manually):
|
| 83 |
+
```python
|
| 84 |
+
from core.utils import process_new_data_and_update_vector_store
|
| 85 |
+
|
| 86 |
+
# Process all new documents
|
| 87 |
+
vector_store = process_new_data_and_update_vector_store()
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
3. **Files are automatically moved** to `data/processed_data/` after successful processing
|
| 91 |
+
|
| 92 |
+
### Manual PDF Loading
|
| 93 |
+
|
| 94 |
+
You can also load PDFs manually:
|
| 95 |
+
|
| 96 |
+
```python
|
| 97 |
+
from pathlib import Path
|
| 98 |
+
from core.data_loaders import load_pdf_documents_advanced
|
| 99 |
+
|
| 100 |
+
# Basic usage (reads API key from environment)
|
| 101 |
+
pdf_path = Path("data/new_data/SASLT/guideline.pdf")
|
| 102 |
+
documents = load_pdf_documents_advanced(pdf_path)
|
| 103 |
+
|
| 104 |
+
# With explicit API key
|
| 105 |
+
documents = load_pdf_documents_advanced(
|
| 106 |
+
pdf_path,
|
| 107 |
+
api_key="llx-your-key-here",
|
| 108 |
+
premium_mode=True
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Batch processing
|
| 112 |
+
from core.data_loaders import load_multiple_pdfs
|
| 113 |
+
|
| 114 |
+
pdf_dir = Path("data/new_data/SASLT")
|
| 115 |
+
all_documents = load_multiple_pdfs(pdf_dir)
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
## Document Metadata
|
| 119 |
+
|
| 120 |
+
Each processed document includes:
|
| 121 |
+
|
| 122 |
+
```python
|
| 123 |
+
{
|
| 124 |
+
"source": "SASLT_2021.pdf",
|
| 125 |
+
"disease": "HBV",
|
| 126 |
+
"provider": "SASLT",
|
| 127 |
+
"page_number": 6,
|
| 128 |
+
"document_index": 5,
|
| 129 |
+
"parser": "llamaparse",
|
| 130 |
+
"premium_mode": False
|
| 131 |
+
}
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
## Parsing Instructions
|
| 135 |
+
|
| 136 |
+
LlamaParse is configured with medical-specific instructions:
|
| 137 |
+
|
| 138 |
+
### Basic Mode
|
| 139 |
+
```
|
| 140 |
+
"This is a medical guideline document.
|
| 141 |
+
Pay special attention to tables (including borderless tables),
|
| 142 |
+
clinical recommendations, dosage information, and reference citations.
|
| 143 |
+
Preserve table structure and maintain hierarchical headings."
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
### Premium Mode
|
| 147 |
+
```
|
| 148 |
+
"Medical guideline document with complex tables. Instructions:
|
| 149 |
+
0. Keep the original text intact without changing anything
|
| 150 |
+
1. Preserve all table structures, especially borderless tables
|
| 151 |
+
2. Maintain hierarchical organization of sections and subsections
|
| 152 |
+
3. Keep dosage tables and treatment algorithms intact
|
| 153 |
+
4. Preserve reference numbers and citations
|
| 154 |
+
5. Identify and mark clinical recommendation levels
|
| 155 |
+
6. Extract figures and their captions accurately"
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
## Cost Considerations
|
| 159 |
+
|
| 160 |
+
- **Basic Mode**: ~$0.003 per page
|
| 161 |
+
- **Premium Mode**: ~$0.01 per page (GPT-4o)
|
| 162 |
+
- **Caching**: LlamaParse caches results, so re-processing the same file is free
|
| 163 |
+
|
| 164 |
+
### Cost Estimation
|
| 165 |
+
For a 50-page medical guideline:
|
| 166 |
+
- Basic: ~$0.15
|
| 167 |
+
- Premium: ~$0.50
|
| 168 |
+
|
| 169 |
+
## Troubleshooting
|
| 170 |
+
|
| 171 |
+
### API Key Not Found
|
| 172 |
+
```
|
| 173 |
+
ValueError: LlamaCloud API key not found
|
| 174 |
+
```
|
| 175 |
+
**Solution**: Set `LLAMA_CLOUD_API_KEY` in your `.env` file
|
| 176 |
+
|
| 177 |
+
### Import Errors
|
| 178 |
+
```
|
| 179 |
+
ModuleNotFoundError: No module named 'llama_parse'
|
| 180 |
+
```
|
| 181 |
+
**Solution**: Install required packages:
|
| 182 |
+
```bash
|
| 183 |
+
pip install llama-parse llama-index-core
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
### Slow Processing
|
| 187 |
+
- LlamaParse processes documents in the cloud
|
| 188 |
+
- First-time processing takes longer (30-60 seconds per document)
|
| 189 |
+
- Subsequent processing uses cache (much faster)
|
| 190 |
+
- Consider using `premium_mode=False` for faster processing
|
| 191 |
+
|
| 192 |
+
### Empty Results
|
| 193 |
+
- Check that PDF is not corrupted
|
| 194 |
+
- Verify API key is valid
|
| 195 |
+
- Check logs for detailed error messages
|
| 196 |
+
|
| 197 |
+
## Migration from PyMuPDF4LLMLoader
|
| 198 |
+
|
| 199 |
+
The integration is **backward compatible**:
|
| 200 |
+
- Existing processed documents remain valid
|
| 201 |
+
- Vector store continues to work
|
| 202 |
+
- Only new documents use LlamaParse
|
| 203 |
+
- No changes needed to existing code
|
| 204 |
+
|
| 205 |
+
### What Changed
|
| 206 |
+
1. **`core/data_loaders.py`**: Replaced PyMuPDF4LLMLoader with LlamaParse
|
| 207 |
+
2. **`core/config.py`**: Added `LLAMA_CLOUD_API_KEY` and `LLAMA_PREMIUM_MODE` settings
|
| 208 |
+
3. **`core/utils.py`**: Updated `_load_documents_for_file()` to use `load_pdf_documents_advanced()`
|
| 209 |
+
|
| 210 |
+
## Benefits Over PyMuPDF4LLMLoader
|
| 211 |
+
|
| 212 |
+
| Feature | PyMuPDF4LLMLoader | LlamaParse |
|
| 213 |
+
|---------|-------------------|------------|
|
| 214 |
+
| Borderless tables | ❌ Poor | ✅ Excellent |
|
| 215 |
+
| Complex layouts | ⚠️ Moderate | ✅ Excellent |
|
| 216 |
+
| Medical terminology | ⚠️ Moderate | ✅ Excellent |
|
| 217 |
+
| Page numbering | ✅ Good | ✅ Excellent |
|
| 218 |
+
| Processing speed | ✅ Fast (local) | ⚠️ Slower (cloud) |
|
| 219 |
+
| Cost | ✅ Free | ⚠️ Paid API |
|
| 220 |
+
| Accuracy | ⚠️ Moderate | ✅ High |
|
| 221 |
+
|
| 222 |
+
## Example Workflow
|
| 223 |
+
|
| 224 |
+
```python
|
| 225 |
+
# 1. Set up environment
|
| 226 |
+
# Add to .env:
|
| 227 |
+
# LLAMA_CLOUD_API_KEY=llx-your-key-here
|
| 228 |
+
# LLAMA_PREMIUM_MODE=False
|
| 229 |
+
|
| 230 |
+
# 2. Place new PDFs
|
| 231 |
+
# data/new_data/SASLT/new_guideline.pdf
|
| 232 |
+
|
| 233 |
+
# 3. Process automatically (on app startup)
|
| 234 |
+
# Or manually:
|
| 235 |
+
from core.utils import process_new_data_and_update_vector_store
|
| 236 |
+
|
| 237 |
+
vector_store = process_new_data_and_update_vector_store()
|
| 238 |
+
# Output:
|
| 239 |
+
# ✅ Parsing PDF with LlamaParse (Premium: False): new_guideline.pdf
|
| 240 |
+
# ✅ Loaded 50 pages from PDF: new_guideline.pdf
|
| 241 |
+
# ✅ Split 50 documents into 245 chunks
|
| 242 |
+
# ✅ Added 245 new chunks to existing vector store
|
| 243 |
+
# 📦 Moved processed file: new_guideline.pdf -> SASLT/new_guideline_20251111_143022.pdf
|
| 244 |
+
|
| 245 |
+
# 4. Query the system
|
| 246 |
+
from core.agent import answer_question
|
| 247 |
+
|
| 248 |
+
response = answer_question(
|
| 249 |
+
"What is the recommended treatment for HBeAg-positive chronic hepatitis B?"
|
| 250 |
+
)
|
| 251 |
+
print(response)
|
| 252 |
+
```
|
| 253 |
+
|
| 254 |
+
## Support
|
| 255 |
+
|
| 256 |
+
For issues or questions:
|
| 257 |
+
1. Check the logs in `logs/app.log`
|
| 258 |
+
2. Verify API key is valid
|
| 259 |
+
3. Review LlamaParse documentation: https://docs.llamaindex.ai/en/stable/llama_cloud/llama_parse/
|
| 260 |
+
4. Check environment variables are set correctly
|
| 261 |
+
|
| 262 |
+
---
|
| 263 |
+
|
| 264 |
+
**Last Updated**: November 11, 2025
|
| 265 |
+
**Integration Status**: ✅ Complete and Production Ready
|
core/config.py
CHANGED
|
@@ -17,6 +17,10 @@ class Settings(BaseSettings):
|
|
| 17 |
|
| 18 |
OPENAI_API_KEY: str
|
| 19 |
OPENAI_BASE_URL: str | None = None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
|
| 22 |
DATA_DIR: str = os.getenv("DATA_DIR", "")
|
|
|
|
| 17 |
|
| 18 |
OPENAI_API_KEY: str
|
| 19 |
OPENAI_BASE_URL: str | None = None
|
| 20 |
+
|
| 21 |
+
# LlamaParse configuration for advanced PDF parsing
|
| 22 |
+
LLAMA_CLOUD_API_KEY: str | None = None
|
| 23 |
+
LLAMA_PREMIUM_MODE: bool = False # Set to True for GPT-4o parsing (costs more)
|
| 24 |
|
| 25 |
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
|
| 26 |
DATA_DIR: str = os.getenv("DATA_DIR", "")
|
core/data_loaders.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
# Import required libraries
|
| 2 |
-
import
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import List
|
| 5 |
from langchain.schema import Document
|
| 6 |
from .config import logger
|
| 7 |
-
from
|
| 8 |
-
from
|
| 9 |
|
| 10 |
|
| 11 |
-
def load_pdf_documents(pdf_path: Path) -> List[Document]:
|
| 12 |
"""
|
| 13 |
-
Load and process PDF documents from medical guidelines using
|
| 14 |
-
|
| 15 |
Extracts disease and provider from directory structure.
|
| 16 |
|
| 17 |
Directory structure expected: data/new_data/PROVIDER/file.pdf
|
|
@@ -19,18 +19,18 @@ def load_pdf_documents(pdf_path: Path) -> List[Document]:
|
|
| 19 |
|
| 20 |
Args:
|
| 21 |
pdf_path: Path to the PDF file
|
|
|
|
|
|
|
| 22 |
|
| 23 |
Returns:
|
| 24 |
List of Document objects with metadata (source, disease, provider, page_number)
|
| 25 |
"""
|
| 26 |
try:
|
| 27 |
-
|
| 28 |
# Validate file exists
|
| 29 |
if not pdf_path.exists():
|
| 30 |
raise FileNotFoundError(f"PDF file not found at {pdf_path}")
|
| 31 |
|
| 32 |
# Extract provider from directory structure
|
| 33 |
-
# Structure: data/new_data/PROVIDER/file.pdf
|
| 34 |
path_parts = pdf_path.parts
|
| 35 |
disease = "HBV" # Default disease for this system
|
| 36 |
provider = "unknown"
|
|
@@ -38,54 +38,225 @@ def load_pdf_documents(pdf_path: Path) -> List[Document]:
|
|
| 38 |
# Find provider: it's the parent directory of the PDF file
|
| 39 |
if len(path_parts) >= 2:
|
| 40 |
provider = path_parts[-2] # Parent directory (e.g., SASLT)
|
| 41 |
-
|
| 42 |
# If provider is 'new_data', it means file is directly in new_data folder
|
| 43 |
if provider.lower() == "new_data":
|
| 44 |
provider = "unknown"
|
| 45 |
|
| 46 |
-
#
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
)
|
| 54 |
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
|
|
|
| 57 |
documents = []
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
metadata
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
documents.append(processed_doc)
|
| 80 |
-
|
| 81 |
logger.info(f"Loaded {len(documents)} pages from PDF: {pdf_path.name} (Disease: {disease}, Provider: {provider})")
|
| 82 |
return documents
|
| 83 |
|
| 84 |
except Exception as e:
|
| 85 |
logger.error(f"Error loading PDF documents from {pdf_path}: {str(e)}")
|
| 86 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
def load_markdown_documents(md_path: Path) -> List[Document]:
|
| 90 |
"""
|
| 91 |
Load and process Markdown medical guidelines.
|
|
@@ -139,4 +310,5 @@ def load_markdown_documents(md_path: Path) -> List[Document]:
|
|
| 139 |
|
| 140 |
except Exception as e:
|
| 141 |
logger.error(f"Error loading Markdown document from {md_path}: {str(e)}")
|
| 142 |
-
raise
|
|
|
|
|
|
| 1 |
# Import required libraries
|
| 2 |
+
import os
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import List
|
| 5 |
from langchain.schema import Document
|
| 6 |
from .config import logger
|
| 7 |
+
from llama_parse import LlamaParse
|
| 8 |
+
from llama_index.core import SimpleDirectoryReader
|
| 9 |
|
| 10 |
|
| 11 |
+
def load_pdf_documents(pdf_path: Path, api_key: str = None) -> List[Document]:
|
| 12 |
"""
|
| 13 |
+
Load and process PDF documents from medical guidelines using LlamaParse.
|
| 14 |
+
Excellent for borderless tables and complex medical document layouts.
|
| 15 |
Extracts disease and provider from directory structure.
|
| 16 |
|
| 17 |
Directory structure expected: data/new_data/PROVIDER/file.pdf
|
|
|
|
| 19 |
|
| 20 |
Args:
|
| 21 |
pdf_path: Path to the PDF file
|
| 22 |
+
api_key: LlamaCloud API key. If None, reads from LLAMA_CLOUD_API_KEY env variable
|
| 23 |
+
Get your API key from: https://cloud.llamaindex.ai/api-key
|
| 24 |
|
| 25 |
Returns:
|
| 26 |
List of Document objects with metadata (source, disease, provider, page_number)
|
| 27 |
"""
|
| 28 |
try:
|
|
|
|
| 29 |
# Validate file exists
|
| 30 |
if not pdf_path.exists():
|
| 31 |
raise FileNotFoundError(f"PDF file not found at {pdf_path}")
|
| 32 |
|
| 33 |
# Extract provider from directory structure
|
|
|
|
| 34 |
path_parts = pdf_path.parts
|
| 35 |
disease = "HBV" # Default disease for this system
|
| 36 |
provider = "unknown"
|
|
|
|
| 38 |
# Find provider: it's the parent directory of the PDF file
|
| 39 |
if len(path_parts) >= 2:
|
| 40 |
provider = path_parts[-2] # Parent directory (e.g., SASLT)
|
| 41 |
+
|
| 42 |
# If provider is 'new_data', it means file is directly in new_data folder
|
| 43 |
if provider.lower() == "new_data":
|
| 44 |
provider = "unknown"
|
| 45 |
|
| 46 |
+
# Get API key from parameter or environment variable
|
| 47 |
+
llama_api_key = api_key or os.getenv("LLAMA_CLOUD_API_KEY")
|
| 48 |
+
if not llama_api_key:
|
| 49 |
+
raise ValueError(
|
| 50 |
+
"LlamaCloud API key not found. Please provide api_key parameter or set "
|
| 51 |
+
"LLAMA_CLOUD_API_KEY environment variable. "
|
| 52 |
+
"Get your key from: https://cloud.llamaindex.ai/api-key"
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# Initialize LlamaParse with optimized settings for medical documents
|
| 56 |
+
parser = LlamaParse(
|
| 57 |
+
api_key=llama_api_key,
|
| 58 |
+
result_type="markdown", # or "text" for plain text
|
| 59 |
+
verbose=True,
|
| 60 |
+
language="en",
|
| 61 |
+
# Medical document optimizations
|
| 62 |
+
parsing_instruction=(
|
| 63 |
+
"This is a medical guideline document. "
|
| 64 |
+
"Pay special attention to tables (including borderless tables), "
|
| 65 |
+
"clinical recommendations, dosage information, and reference citations. "
|
| 66 |
+
"Preserve table structure and maintain hierarchical headings."
|
| 67 |
+
),
|
| 68 |
+
# Advanced options for better table handling
|
| 69 |
+
invalidate_cache=False, # Use cache for faster re-processing
|
| 70 |
+
do_not_cache=False,
|
| 71 |
+
fast_mode=False, # Use deep parsing for better accuracy
|
| 72 |
+
# Split by page for proper page numbering
|
| 73 |
+
split_by_page=True, # This is the key parameter!
|
| 74 |
)
|
| 75 |
|
| 76 |
+
# Parse the PDF file
|
| 77 |
+
logger.info(f"Parsing PDF with LlamaParse: {pdf_path.name}")
|
| 78 |
+
|
| 79 |
+
# Use SimpleDirectoryReader with LlamaParse
|
| 80 |
+
file_extractor = {".pdf": parser}
|
| 81 |
+
reader = SimpleDirectoryReader(
|
| 82 |
+
input_files=[str(pdf_path)],
|
| 83 |
+
file_extractor=file_extractor
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
# Load documents - each page will be a separate document when split_by_page=True
|
| 87 |
+
llama_documents = reader.load_data()
|
| 88 |
|
| 89 |
+
# Convert to LangChain Document format
|
| 90 |
documents = []
|
| 91 |
+
|
| 92 |
+
for doc_idx, llama_doc in enumerate(llama_documents):
|
| 93 |
+
# When split_by_page=True, each llama_doc represents one page
|
| 94 |
+
# Check if page number exists in metadata, otherwise use index
|
| 95 |
+
page_num = llama_doc.metadata.get('page_number', doc_idx + 1)
|
| 96 |
+
|
| 97 |
+
processed_doc = Document(
|
| 98 |
+
page_content=llama_doc.text.strip(),
|
| 99 |
+
metadata={
|
| 100 |
+
"source": pdf_path.name,
|
| 101 |
+
"disease": disease,
|
| 102 |
+
"provider": provider,
|
| 103 |
+
"page_number": page_num,
|
| 104 |
+
"document_index": doc_idx,
|
| 105 |
+
# Preserve any additional metadata from LlamaParse
|
| 106 |
+
**{k: v for k, v in llama_doc.metadata.items()
|
| 107 |
+
if k not in ['source', 'disease', 'provider', 'page_number', 'document_index']}
|
| 108 |
+
}
|
| 109 |
+
)
|
| 110 |
+
documents.append(processed_doc)
|
| 111 |
+
|
|
|
|
|
|
|
| 112 |
logger.info(f"Loaded {len(documents)} pages from PDF: {pdf_path.name} (Disease: {disease}, Provider: {provider})")
|
| 113 |
return documents
|
| 114 |
|
| 115 |
except Exception as e:
|
| 116 |
logger.error(f"Error loading PDF documents from {pdf_path}: {str(e)}")
|
| 117 |
raise
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def load_pdf_documents_advanced(
|
| 121 |
+
pdf_path: Path,
|
| 122 |
+
api_key: str = None,
|
| 123 |
+
premium_mode: bool = False
|
| 124 |
+
) -> List[Document]:
|
| 125 |
+
"""
|
| 126 |
+
Advanced version with premium features for complex medical documents.
|
| 127 |
+
|
| 128 |
+
Args:
|
| 129 |
+
pdf_path: Path to the PDF file
|
| 130 |
+
api_key: LlamaCloud API key
|
| 131 |
+
premium_mode: Use premium GPT-4o mode for highest accuracy (costs more)
|
| 132 |
+
|
| 133 |
+
Returns:
|
| 134 |
+
List of Document objects with enhanced metadata
|
| 135 |
+
"""
|
| 136 |
+
try:
|
| 137 |
+
if not pdf_path.exists():
|
| 138 |
+
raise FileNotFoundError(f"PDF file not found at {pdf_path}")
|
| 139 |
+
|
| 140 |
+
path_parts = pdf_path.parts
|
| 141 |
+
disease = "HBV"
|
| 142 |
+
provider = path_parts[-2] if len(path_parts) >= 2 else "unknown"
|
| 143 |
+
if provider.lower() == "new_data":
|
| 144 |
+
provider = "unknown"
|
| 145 |
+
|
| 146 |
+
llama_api_key = api_key or os.getenv("LLAMA_CLOUD_API_KEY")
|
| 147 |
+
if not llama_api_key:
|
| 148 |
+
raise ValueError("LlamaCloud API key required")
|
| 149 |
+
|
| 150 |
+
# Advanced parser configuration
|
| 151 |
+
parser = LlamaParse(
|
| 152 |
+
api_key=llama_api_key,
|
| 153 |
+
result_type="markdown",
|
| 154 |
+
verbose=True,
|
| 155 |
+
language="en",
|
| 156 |
+
# Premium mode uses GPT-4o for better accuracy
|
| 157 |
+
premium_mode=premium_mode,
|
| 158 |
+
# Detailed parsing instructions for medical content
|
| 159 |
+
parsing_instruction=(
|
| 160 |
+
"Medical guideline document with complex tables. Instructions:\n"
|
| 161 |
+
"0. Keep the original text intact without changing anything\n"
|
| 162 |
+
"1. Preserve all table structures, especially borderless tables\n"
|
| 163 |
+
"2. Maintain hierarchical organization of sections and subsections\n"
|
| 164 |
+
"3. Keep dosage tables and treatment algorithms intact\n"
|
| 165 |
+
"4. Preserve reference numbers and citations\n"
|
| 166 |
+
"5. Identify and mark clinical recommendation levels\n"
|
| 167 |
+
"6. Extract figures and their captions accurately"
|
| 168 |
+
),
|
| 169 |
+
# Extract structured data
|
| 170 |
+
take_screenshot=True, # Capture page screenshots for reference
|
| 171 |
+
# Table-specific optimizations
|
| 172 |
+
invalidate_cache=False,
|
| 173 |
+
do_not_cache=False,
|
| 174 |
+
fast_mode=False,
|
| 175 |
+
# Critical: split by page for accurate page numbering
|
| 176 |
+
split_by_page=True,
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
file_extractor = {".pdf": parser}
|
| 180 |
+
reader = SimpleDirectoryReader(
|
| 181 |
+
input_files=[str(pdf_path)],
|
| 182 |
+
file_extractor=file_extractor
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
logger.info(f"Parsing PDF with LlamaParse (Premium: {premium_mode}): {pdf_path.name}")
|
| 186 |
+
llama_documents = reader.load_data()
|
| 187 |
+
|
| 188 |
+
documents = []
|
| 189 |
+
for doc_idx, llama_doc in enumerate(llama_documents):
|
| 190 |
+
# Get page number from metadata or use index
|
| 191 |
+
page_num = llama_doc.metadata.get('page_number', doc_idx + 1)
|
| 192 |
+
|
| 193 |
+
# Enhanced metadata
|
| 194 |
+
metadata = {
|
| 195 |
+
"source": pdf_path.name,
|
| 196 |
+
"disease": disease,
|
| 197 |
+
"provider": provider,
|
| 198 |
+
"page_number": page_num,
|
| 199 |
+
"document_index": doc_idx + 1,
|
| 200 |
+
"parser": "llamaparse",
|
| 201 |
+
"premium_mode": premium_mode
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
# Add additional metadata from LlamaIndex document
|
| 205 |
+
if hasattr(llama_doc, 'metadata'):
|
| 206 |
+
# Merge additional metadata, avoiding duplicates
|
| 207 |
+
for key, value in llama_doc.metadata.items():
|
| 208 |
+
if key not in metadata:
|
| 209 |
+
metadata[key] = value
|
| 210 |
+
|
| 211 |
+
processed_doc = Document(
|
| 212 |
+
page_content=llama_doc.text.strip(),
|
| 213 |
+
metadata=metadata
|
| 214 |
+
)
|
| 215 |
+
documents.append(processed_doc)
|
| 216 |
+
|
| 217 |
+
logger.info(f"Loaded {len(documents)} pages from PDF: {pdf_path.name}")
|
| 218 |
+
return documents
|
| 219 |
+
|
| 220 |
+
except Exception as e:
|
| 221 |
+
logger.error(f"Error loading PDF documents from {pdf_path}: {str(e)}")
|
| 222 |
+
raise
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
# Batch processing function for multiple PDFs
|
| 226 |
+
def load_multiple_pdfs(
|
| 227 |
+
pdf_directory: Path,
|
| 228 |
+
api_key: str = None,
|
| 229 |
+
file_pattern: str = "*.pdf"
|
| 230 |
+
) -> List[Document]:
|
| 231 |
+
"""
|
| 232 |
+
Load multiple PDF files from a directory.
|
| 233 |
|
| 234 |
+
Args:
|
| 235 |
+
pdf_directory: Directory containing PDF files
|
| 236 |
+
api_key: LlamaCloud API key
|
| 237 |
+
file_pattern: Glob pattern for PDF files (default: "*.pdf")
|
| 238 |
|
| 239 |
+
Returns:
|
| 240 |
+
List of all documents from all PDFs
|
| 241 |
+
"""
|
| 242 |
+
all_documents = []
|
| 243 |
+
pdf_files = list(pdf_directory.glob(file_pattern))
|
| 244 |
+
|
| 245 |
+
logger.info(f"Found {len(pdf_files)} PDF files to process")
|
| 246 |
+
|
| 247 |
+
for pdf_path in pdf_files:
|
| 248 |
+
try:
|
| 249 |
+
documents = load_pdf_documents(pdf_path, api_key=api_key)
|
| 250 |
+
all_documents.extend(documents)
|
| 251 |
+
logger.info(f"Successfully processed: {pdf_path.name}")
|
| 252 |
+
except Exception as e:
|
| 253 |
+
logger.error(f"Failed to process {pdf_path.name}: {str(e)}")
|
| 254 |
+
continue
|
| 255 |
+
|
| 256 |
+
logger.info(f"Total documents loaded: {len(all_documents)}")
|
| 257 |
+
return all_documents
|
| 258 |
+
|
| 259 |
+
|
| 260 |
def load_markdown_documents(md_path: Path) -> List[Document]:
|
| 261 |
"""
|
| 262 |
Load and process Markdown medical guidelines.
|
|
|
|
| 310 |
|
| 311 |
except Exception as e:
|
| 312 |
logger.error(f"Error loading Markdown document from {md_path}: {str(e)}")
|
| 313 |
+
raise
|
| 314 |
+
|
core/hbv_assessment.py
CHANGED
|
@@ -113,110 +113,86 @@ def normalize_recommendations(text: str) -> str:
|
|
| 113 |
|
| 114 |
# SASLT 2021 Guidelines - Hardcoded Page Contents
|
| 115 |
SASLT_GUIDELINES = """
|
| 116 |
-
===== TREATMENT
|
| 117 |
-
|
| 118 |
|
| 119 |
### 1. INITIATION OF TREATMENT [SASLT 2021, p. 6]
|
| 120 |
|
| 121 |
-
• Treatment indications should also take into account patient's age, health status, risk of HBV transmission, family history of HCC or cirrhosis and extrahepatic manifestations
|
| 122 |
-
|
| 123 |
-
•
|
| 124 |
-
|
| 125 |
-
• Patients with
|
| 126 |
-
|
| 127 |
-
• HBV Eligibility Criteria:
|
| 128 |
-
•Patients with HBV DNA > 20,000 IU/mL and ALT > 2xULN, regardless of the degree of fibrosis (Grade B)
|
| 129 |
-
|
| 130 |
-
• Patients with HBeAg-positive chronic HBV infection (persistently normal ALT and high HBV DNA levels) may be treated if they are > 30 years, regardless of the severity of liver histological lesions (Grade D)
|
| 131 |
-
|
| 132 |
-
• Patients with chronic HBV infection (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and a family history of HCC or cirrhosis and extrahepatic manifestations (Grade D)
|
| 133 |
|
| 134 |
|
| 135 |
### 2. MANAGEMENT ALGORITHM [SASLT 2021, p. 6]
|
| 136 |
|
| 137 |
-
• HBsAg positive with chronic HBV infection and no signs of chronic hepatitis → Monitor (HBsAg, HBeAg, HBV DNA, ALT, fibrosis assessment). Consider: risk of HCC, risk of HBV reactivation, extrahepatic manifestations, risk of HBV transmission
|
| 138 |
-
|
| 139 |
-
•
|
| 140 |
-
|
| 141 |
-
• HBsAg negative, anti-HBc positive → No specialist follow-up (inform about HBV reactivation risk). In case of immunosuppression: start oral antiviral prophylaxis or monitor
|
| 142 |
|
| 143 |
|
| 144 |
### 3. MONITORING OF UNTREATED PATIENTS [SASLT 2021, p. 6-7]
|
| 145 |
|
| 146 |
-
• Patients with HBeAg-positive chronic HBV infection who are younger than 30 years should be followed at least every 3-6 months (Grade B)
|
| 147 |
-
|
| 148 |
-
• Patients with HBeAg-negative chronic HBV infection and serum HBV DNA
|
| 149 |
-
|
| 150 |
-
• Patients with HBeAg-negative chronic HBV infection and serum HBV DNA ≥2,000 IU/ml should be followed every 3 months for the first year and thereafter every 6 months (Grade D)
|
| 151 |
|
| 152 |
|
| 153 |
### 4. CHRONIC HEPATITIS B (CHB) TREATMENT [SASLT 2021, p. 7-8]
|
| 154 |
|
| 155 |
-
• The treatment of choice is the long-term administration of a potent nucleos(t)ide analogue NA with a high barrier to resistance, regardless of the severity of liver disease (Grade A)
|
| 156 |
-
|
| 157 |
-
•
|
| 158 |
-
|
| 159 |
-
• LAM, ADV and TBV are not recommended in the treatment of CHB (Grade A)
|
| 160 |
|
| 161 |
|
| 162 |
### 5. HBV-HCV COINFECTION [SASLT 2021, p. 8-9]
|
| 163 |
|
| 164 |
-
• Treatment of HCV through DAAs may lead to reactivation of HBV. Patients who meet the criteria for HBV treatment should be treated concurrently or before initiation of DAA (Grade A)
|
| 165 |
-
|
| 166 |
-
•
|
| 167 |
-
|
| 168 |
-
• ALT level should be monitored every four weeks while on DAA for patients who are HBsAg-negative but HBcAb-positive. If ALT starts to rise, HBsAg and HBV DNA must be obtained to determine the need to start HBV treatment (Grade D)
|
| 169 |
|
| 170 |
|
| 171 |
### 6. HBV-HDV COINFECTION [SASLT 2021, p. 9]
|
| 172 |
|
| 173 |
-
• HDV is a defective virus that requires HBsAg to envelop its delta antigen, causing coinfection with HBV or superinfection in chronic HBV patients
|
| 174 |
-
|
| 175 |
-
•
|
| 176 |
-
|
| 177 |
-
•
|
| 178 |
-
|
| 179 |
-
• PEG-IFN for 1 year shows long-term benefits despite post-treatment viral relapse
|
| 180 |
-
|
| 181 |
-
• NA monotherapy is ineffective against HDV replication
|
| 182 |
|
| 183 |
|
| 184 |
### 7. HBV-HIV COINFECTION [SASLT 2021, p. 9]
|
| 185 |
|
| 186 |
-
• All HIV-positive patients with HBV co-infection should start ART irrespective of CD4 cell count (Grade A)
|
| 187 |
-
|
| 188 |
-
• HBV-HIV co-infected patients should be treated with TDF- or TAF-based ART regimen (Grade A)
|
| 189 |
|
| 190 |
|
| 191 |
### 8. IMMUNOCOMPROMISED PATIENTS [SASLT 2021, p. 9]
|
| 192 |
|
| 193 |
-
• Prophylaxis for all HBsAg-positive patients before chemotherapy or immunosuppressive therapy (Grade A)
|
| 194 |
-
|
| 195 |
-
•
|
| 196 |
-
|
| 197 |
-
•
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
|
| 200 |
### 9. PREGNANCY [SASLT 2021, p. 9-10]
|
| 201 |
|
| 202 |
-
• Screen all pregnant women for HBV in first trimester (Grade A)
|
| 203 |
-
|
| 204 |
-
•
|
| 205 |
-
|
| 206 |
-
•
|
| 207 |
-
|
| 208 |
-
•
|
| 209 |
-
|
| 210 |
-
• Switch to TDF/TAF if on ETV, ADV, or interferon during pregnancy (Grade D)
|
| 211 |
-
|
| 212 |
-
• Delivery mode based on obstetric indications only
|
| 213 |
-
|
| 214 |
-
• Breastfeeding permitted for HBsAg+ women on TDF (Grade B)
|
| 215 |
-
|
| 216 |
-
----
|
| 217 |
"""
|
| 218 |
|
| 219 |
|
|
|
|
| 220 |
def assess_hbv_eligibility(patient_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 221 |
"""
|
| 222 |
Assess patient eligibility for HBV treatment based on SASLT 2021 guidelines
|
|
@@ -231,12 +207,12 @@ def assess_hbv_eligibility(patient_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 231 |
- recommendations: str (comprehensive narrative with inline citations in format [SASLT 2021, Page X])
|
| 232 |
"""
|
| 233 |
try:
|
| 234 |
-
# Check if HBsAg is positive (required for treatment consideration)
|
| 235 |
-
if patient_data.get("hbsag_status") != "Positive":
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
|
| 241 |
# Use hardcoded SASLT 2021 guidelines instead of RAG retrieval
|
| 242 |
logger.info("Using hardcoded SASLT 2021 guidelines (Pages 3, 4, 6, 7, 8, 9, 10)")
|
|
@@ -262,7 +238,7 @@ def assess_hbv_eligibility(patient_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 262 |
comorbidities = patient_data.get("other_comorbidities", [])
|
| 263 |
|
| 264 |
# Create prompt for LLM to analyze patient against guidelines
|
| 265 |
-
analysis_prompt = f"""You are an HBV treatment eligibility assessment system. Analyze the patient data against SASLT 2021 guidelines.
|
| 266 |
PATIENT DATA:
|
| 267 |
- Sex: {sex}
|
| 268 |
- Age: {age} years
|
|
@@ -290,19 +266,30 @@ You MUST respond with a valid JSON object in this exact format:
|
|
| 290 |
"recommendations": "Comprehensive assessment with inline citations"
|
| 291 |
}}
|
| 292 |
IMPORTANT JSON FORMATTING:
|
| 293 |
-
- Return ONLY valid JSON without markdown code blocks
|
| 294 |
- You MUST use "\\n" to indicate line breaks inside the "recommendations" string and format the content as clear bullet lists prefixed with "- ".
|
| 295 |
- Do NOT include literal newline characters. Use \\n for every new bullet or line.
|
| 296 |
- Use SINGLE \\n between lines. Do NOT use \\n\\n (double newlines) anywhere.
|
| 297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
STRUCTURE AND CONTENT OF "recommendations" (CONCISE & ORGANIZED):
|
| 299 |
- Use ONLY these sections in this exact order, each as a header followed by 1-3 concise bullets:
|
| 300 |
-
1. "Eligibility and Rationale:" (1-2 bullets max)
|
| 301 |
-
2. "Treatment Recommendations:" (1-3 bullets: first-line drugs if eligible, or "
|
| 302 |
3. "Monitoring and Follow-up:" (1-2 bullets)
|
| 303 |
-
4. "Special Considerations:" (0-2 bullets, ONLY if applicable
|
| 304 |
5. "References:" (1 line listing pages cited)
|
| 305 |
-
|
| 306 |
- OMIT "Additional Notes" and any other sections.
|
| 307 |
- Keep each bullet to ONE sentence (max 25 words per bullet).
|
| 308 |
- Total output: aim for 8-12 bullets maximum across all sections.
|
|
@@ -313,31 +300,49 @@ BULLETING AND CITATIONS RULES:
|
|
| 313 |
- Only cite pages 6–10 that actually contain the information.
|
| 314 |
|
| 315 |
STRICT ACCURACY AND CONSISTENCY RULES:
|
| 316 |
-
-
|
| 317 |
-
-
|
| 318 |
-
-
|
| 319 |
-
-
|
| 320 |
-
- BREVITY: Each bullet = 1 sentence, max 25 words. Total = 8-12 bullets max.
|
| 321 |
|
| 322 |
PAGE-TO-TOPIC MAPPING GUIDANCE (for correct citations):
|
| 323 |
-
- Page 6:
|
| 324 |
-
- Page 7: Monitoring of untreated patients
|
| 325 |
- Page 8: Treatment drugs/regimens (ETV, TDF, TAF), agents not recommended.
|
| 326 |
-
- Page 9: Special populations (HBV-HCV, HBV-HDV, HBV-HIV,
|
| 327 |
-
- Page 10: Pregnancy-related recommendations
|
| 328 |
-
|
| 329 |
-
EXAMPLE OUTPUT (ELIGIBLE PATIENT) - Use SINGLE \\n only:
|
| 330 |
-
Eligibility and Rationale:\n- Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, moderate fibrosis (Grade A) [SASLT 2021, Page 6]\nTreatment Recommendations:\n- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8]\nMonitoring and Follow-up:\n- Monitor treatment response per SASLT protocol [SASLT 2021, Page 7]\nSpecial Considerations:\n- HBV-HIV coinfection: use TDF- or TAF-based ART (Grade A) [SASLT 2021, Page 9]\nReferences:\n- Pages 6, 7, 8, 9: Treatment criteria, drugs, monitoring, HIV coinfection
|
| 331 |
-
|
| 332 |
-
EXAMPLE OUTPUT (NOT ELIGIBLE PATIENT) - Use SINGLE \\n only:
|
| 333 |
-
Eligibility and Rationale:\n- Not eligible: HBV DNA < 2,000 IU/mL, ALT ≤ ULN, no significant fibrosis [SASLT 2021, Page 6]\nTreatment Recommendations:\n- Treatment not indicated at this time [SASLT 2021, Page 6]\nMonitoring and Follow-up:\n- Monitor every 6-12 months (HBeAg-negative, HBV DNA < 2,000 IU/mL) (Grade B) [SASLT 2021, Page 7]\nReferences:\n- Pages 6, 7: Treatment criteria, monitoring protocols
|
| 334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
CRITICAL REQUIREMENTS:
|
| 336 |
1. Base assessment ONLY on SASLT 2021 guidelines provided.
|
| 337 |
-
2.
|
| 338 |
-
3.
|
| 339 |
-
4.
|
| 340 |
-
5.
|
| 341 |
6. Return ONLY valid JSON, no markdown, no extra text.
|
| 342 |
"""
|
| 343 |
|
|
|
|
| 113 |
|
| 114 |
# SASLT 2021 Guidelines - Hardcoded Page Contents
|
| 115 |
SASLT_GUIDELINES = """
|
| 116 |
+
===== SASLT 2021 GUIDELINES: TREATMENT & MANAGEMENT =====
|
|
|
|
| 117 |
|
| 118 |
### 1. INITIATION OF TREATMENT [SASLT 2021, p. 6]
|
| 119 |
|
| 120 |
+
• Treatment indications should also take into account patient's age, health status, risk of HBV transmission, family history of HCC or cirrhosis and extrahepatic manifestations [SASLT 2021, p. 6]
|
| 121 |
+
• All patients with chronic hepatitis B (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and/or at least moderate liver necroinflammation or fibrosis (Grade A) [SASLT 2021, p. 6]
|
| 122 |
+
• Patients with cirrhosis (compensated or decompensated), with any detectable HBV DNA level and regardless of ALT levels (Grade A) [SASLT 2021, p. 6]
|
| 123 |
+
• Patients with HBV DNA > 20,000 IU/mL and ALT > 2xULN, regardless of the degree of fibrosis (Grade B) [SASLT 2021, p. 6]
|
| 124 |
+
• Patients with HBeAg-positive chronic HBV infection (persistently normal ALT and high HBV DNA levels) may be treated if they are > 30 years, regardless of the severity of liver histological lesions (Grade D) [SASLT 2021, p. 6]
|
| 125 |
+
• Patients with chronic HBV infection (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and a family history of HCC or cirrhosis and extrahepatic manifestations (Grade D) [SASLT 2021, p. 6]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
|
| 128 |
### 2. MANAGEMENT ALGORITHM [SASLT 2021, p. 6]
|
| 129 |
|
| 130 |
+
• HBsAg positive with chronic HBV infection and no signs of chronic hepatitis → Monitor (HBsAg, HBeAg, HBV DNA, ALT, fibrosis assessment). Consider: risk of HCC, risk of HBV reactivation, extrahepatic manifestations, risk of HBV transmission [SASLT 2021, p. 6]
|
| 131 |
+
• CHB (with/without cirrhosis) → Start antiviral treatment if indicated, otherwise return to monitoring [SASLT 2021, p. 6]
|
| 132 |
+
• HBsAg negative, anti-HBc positive → No specialist follow-up (inform about HBV reactivation risk). In case of immunosuppression: start oral antiviral prophylaxis or monitor [SASLT 2021, p. 6]
|
|
|
|
|
|
|
| 133 |
|
| 134 |
|
| 135 |
### 3. MONITORING OF UNTREATED PATIENTS [SASLT 2021, p. 6-7]
|
| 136 |
|
| 137 |
+
• Patients with HBeAg-positive chronic HBV infection who are younger than 30 years should be followed at least every 3-6 months (Grade B) [SASLT 2021, p. 7]
|
| 138 |
+
• Patients with HBeAg-negative chronic HBV infection and serum HBV DNA <2,000 IU/ml should be followed every 6-12 months (Grade B) [SASLT 2021, p. 7]
|
| 139 |
+
• Patients with HBeAg-negative chronic HBV infection and serum HBV DNA ≥2,000 IU/ml should be followed every 3 months for the first year and thereafter every 6 months (Grade D) [SASLT 2021, p. 7]
|
|
|
|
|
|
|
| 140 |
|
| 141 |
|
| 142 |
### 4. CHRONIC HEPATITIS B (CHB) TREATMENT [SASLT 2021, p. 7-8]
|
| 143 |
|
| 144 |
+
• The treatment of choice is the long-term administration of a potent nucleos(t)ide analogue NA with a high barrier to resistance, regardless of the severity of liver disease (Grade A) [SASLT 2021, p. 8]
|
| 145 |
+
• Preferred regimens are ETV, TDF and TAF as monotherapies (Grade A) [SASLT 2021, p. 8]
|
| 146 |
+
• LAM, ADV and TBV are not recommended in the treatment of CHB (Grade A) [SASLT 2021, p. 8]
|
|
|
|
|
|
|
| 147 |
|
| 148 |
|
| 149 |
### 5. HBV-HCV COINFECTION [SASLT 2021, p. 8-9]
|
| 150 |
|
| 151 |
+
• Treatment of HCV through DAAs may lead to reactivation of HBV. Patients who meet the criteria for HBV treatment should be treated concurrently or before initiation of DAA (Grade A) [SASLT 2021, p. 9]
|
| 152 |
+
• HBV DNA and ALT should be monitored every four to eight weeks while on DAA and three months after completion of therapy (Grade D) [SASLT 2021, p. 9]
|
| 153 |
+
• ALT level should be monitored every four weeks while on DAA for patients who are HBsAg-negative but HBcAb-positive. If ALT starts to rise, HBsAg and HBV DNA must be obtained to determine the need to start HBV treatment (Grade D) [SASLT 2021, p. 9]
|
|
|
|
|
|
|
| 154 |
|
| 155 |
|
| 156 |
### 6. HBV-HDV COINFECTION [SASLT 2021, p. 9]
|
| 157 |
|
| 158 |
+
• HDV is a defective virus that requires HBsAg to envelop its delta antigen, causing coinfection with HBV or superinfection in chronic HBV patients [SASLT 2021, p. 9]
|
| 159 |
+
• Active HDV infection is defined by HDV IgM and RNA presence with unexplained LFT elevation [SASLT 2021, p. 9]
|
| 160 |
+
• Treatment goal: Suppression of HDV replication [SASLT 2021, p. 9]
|
| 161 |
+
• PEG-IFN for 1 year shows long-term benefits despite post-treatment viral relapse [SASLT 2021, p. 9]
|
| 162 |
+
• NA monotherapy is ineffective against HDV replication [SASLT 2021, p. 9]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
|
| 165 |
### 7. HBV-HIV COINFECTION [SASLT 2021, p. 9]
|
| 166 |
|
| 167 |
+
• All HIV-positive patients with HBV co-infection should start ART irrespective of CD4 cell count (Grade A) [SASLT 2021, p. 9]
|
| 168 |
+
• HBV-HIV co-infected patients should be treated with TDF- or TAF-based ART regimen (Grade A) [SASLT 2021, p. 9]
|
|
|
|
| 169 |
|
| 170 |
|
| 171 |
### 8. IMMUNOCOMPROMISED PATIENTS [SASLT 2021, p. 9]
|
| 172 |
|
| 173 |
+
• Prophylaxis for all HBsAg-positive patients before chemotherapy or immunosuppressive therapy (Grade A) [SASLT 2021, p. 9]
|
| 174 |
+
• HBsAg-negative/anti-HBc-positive patients need HBV prophylaxis if receiving anti-CD20 or stem cell transplantation [SASLT 2021, p. 9]
|
| 175 |
+
• Continue prophylaxis for ≥6 months after immunosuppression (12 months for anti-CD20) [SASLT 2021, p. 9]
|
| 176 |
+
• All patients undergoing immunosuppressive treatment or chemotherapy, even short‑term courses, should be screened for HBsAg, anti‑HBc, and anti‑HBs (and HBV DNA, if HBsAg is already positive). [SASLT 2021, p. 9]
|
| 177 |
+
• We recommend prophylaxis for all patients with positive HBsAg before initiating chemotherapy or other immunosuppressive agents. [SASLT 2021, p. 9]
|
| 178 |
+
• For HBsAg-negative and anti-HBc positive patients, we recommend HBV prophylaxis if they are candidates for anti CD20 or are undergoing stem cell transplantation. [SASLT 2021, p. 9]
|
| 179 |
+
• We recommend starting HBV prophylaxis for HBsAg or anti‑HBc positive patients undergoing treatment with tumor necrosis factor (TNF) inhibitors. [SASLT 2021, p. 9]
|
| 180 |
+
• We recommend HBV prophylaxis for all patients who are HBsAg or anti-HBc positive before initiation of immunotherapy such as anti‑programmed cell death (PD) ‑1 and anti‑programmed cell death‑ligand 1 (PD‑L1) therapy. [SASLT 2021, p. 9]
|
| 181 |
|
| 182 |
|
| 183 |
### 9. PREGNANCY [SASLT 2021, p. 9-10]
|
| 184 |
|
| 185 |
+
• Screen all pregnant women for HBV in first trimester (Grade A) [SASLT 2021, p. 9]
|
| 186 |
+
• HBV vaccine is safe in pregnancy for non-immune women without chronic HBV. [SASLT 2021, p. 9]
|
| 187 |
+
• Treat pregnant women meeting standard therapy indications [SASLT 2021, p. 9]
|
| 188 |
+
• Start antiviral prophylaxis with TDF (or TAF) for HBV DNA >100,000 IU/mL at 24-28 weeks (Grade D) [SASLT 2021, p. 10]
|
| 189 |
+
• Switch to TDF/TAF if on ETV, ADV, or interferon during pregnancy (Grade D) [SASLT 2021, p. 10]
|
| 190 |
+
• Delivery mode based on obstetric indications only [SASLT 2021, p. 10]
|
| 191 |
+
• Breastfeeding permitted for HBsAg+ women on TDF (Grade B) [SASLT 2021, p. 10]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
"""
|
| 193 |
|
| 194 |
|
| 195 |
+
|
| 196 |
def assess_hbv_eligibility(patient_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 197 |
"""
|
| 198 |
Assess patient eligibility for HBV treatment based on SASLT 2021 guidelines
|
|
|
|
| 207 |
- recommendations: str (comprehensive narrative with inline citations in format [SASLT 2021, Page X])
|
| 208 |
"""
|
| 209 |
try:
|
| 210 |
+
# # Check if HBsAg is positive (required for treatment consideration)
|
| 211 |
+
# if patient_data.get("hbsag_status") != "Positive":
|
| 212 |
+
# return {
|
| 213 |
+
# "eligible": False,
|
| 214 |
+
# "recommendations": "Patient is HBsAg negative. HBV treatment is not indicated. HBsAg positivity is required for HBV treatment consideration according to SASLT 2021 guidelines."
|
| 215 |
+
# }
|
| 216 |
|
| 217 |
# Use hardcoded SASLT 2021 guidelines instead of RAG retrieval
|
| 218 |
logger.info("Using hardcoded SASLT 2021 guidelines (Pages 3, 4, 6, 7, 8, 9, 10)")
|
|
|
|
| 238 |
comorbidities = patient_data.get("other_comorbidities", [])
|
| 239 |
|
| 240 |
# Create prompt for LLM to analyze patient against guidelines
|
| 241 |
+
analysis_prompt = f"""You are an HBV treatment eligibility assessment system. Analyze the patient data against the SASLT 2021 guidelines.
|
| 242 |
PATIENT DATA:
|
| 243 |
- Sex: {sex}
|
| 244 |
- Age: {age} years
|
|
|
|
| 266 |
"recommendations": "Comprehensive assessment with inline citations"
|
| 267 |
}}
|
| 268 |
IMPORTANT JSON FORMATTING:
|
| 269 |
+
- Return ONLY valid JSON without markdown code blocks.
|
| 270 |
- You MUST use "\\n" to indicate line breaks inside the "recommendations" string and format the content as clear bullet lists prefixed with "- ".
|
| 271 |
- Do NOT include literal newline characters. Use \\n for every new bullet or line.
|
| 272 |
- Use SINGLE \\n between lines. Do NOT use \\n\\n (double newlines) anywhere.
|
| 273 |
|
| 274 |
+
CRITICAL ELIGIBILITY HIERARCHY:
|
| 275 |
+
1. **Check Special Populations FIRST (Pages 9-10):** "Eligible" means eligible for ANY antiviral intervention (treatment OR prophylaxis).
|
| 276 |
+
* If patient is **HBsAg-positive** AND **Immunosuppressed** (Page 9): Set **"eligible": true"** (for prophylaxis).
|
| 277 |
+
* If patient has **HBV-HIV coinfection** (Page 9): Set **"eligible": true"** (for ART).
|
| 278 |
+
* If patient is **Pregnant** with **HBV DNA > 100,000 IU/mL** (Page 10): Set **"eligible": true"** (for prophylaxis).
|
| 279 |
+
* If patient has **HBV-HCV coinfection** AND meets HBV treatment criteria (Page 9): Set **"eligible": true"**.
|
| 280 |
+
2. **Check Standard Criteria SECOND (Page 6):**
|
| 281 |
+
* If *not* eligible based on special populations, check standard criteria (HBV DNA, ALT, fibrosis, age, family history).
|
| 282 |
+
* If any Page 6 criteria are met (e.g., Cirrhosis, or HBV DNA > 2,000 + ALT > ULN + moderate fibrosis, etc.): Set **"eligible": true"**.
|
| 283 |
+
3. **If NO criteria from (1) or (2) are met:** Set **"eligible": false"**.
|
| 284 |
+
4. The "eligible" flag MUST be consistent with the "Eligibility and Rationale" bullet. Do not contradict yourself.
|
| 285 |
+
|
| 286 |
STRUCTURE AND CONTENT OF "recommendations" (CONCISE & ORGANIZED):
|
| 287 |
- Use ONLY these sections in this exact order, each as a header followed by 1-3 concise bullets:
|
| 288 |
+
1. "Eligibility and Rationale:" (1-2 bullets max, MUST state the primary reason for eligibility/ineligibility)
|
| 289 |
+
2. "Treatment Recommendations:" (1-3 bullets: first-line drugs if eligible, or "Treatment not indicated" if not eligible)
|
| 290 |
3. "Monitoring and Follow-up:" (1-2 bullets)
|
| 291 |
+
4. "Special Considerations:" (0-2 bullets, ONLY if applicable and *not* the primary reason for eligibility)
|
| 292 |
5. "References:" (1 line listing pages cited)
|
|
|
|
| 293 |
- OMIT "Additional Notes" and any other sections.
|
| 294 |
- Keep each bullet to ONE sentence (max 25 words per bullet).
|
| 295 |
- Total output: aim for 8-12 bullets maximum across all sections.
|
|
|
|
| 300 |
- Only cite pages 6–10 that actually contain the information.
|
| 301 |
|
| 302 |
STRICT ACCURACY AND CONSISTENCY RULES:
|
| 303 |
+
- **NO CONTRADICTIONS:** The "eligible" flag MUST match the rationale. Follow the CRITICAL ELIGIBILITY HIERARCHY.
|
| 304 |
+
- **Rationale First:** The Rationale MUST state the primary reason. If eligible due to immunosuppression, state that, even if Page 6 criteria are not met.
|
| 305 |
+
- **Use ONLY the provided SASLT 2021 content;** do NOT add external knowledge.
|
| 306 |
+
- **BREVITY:** Each bullet = 1 sentence, max 25 words. Total = 8-12 bullets max.
|
|
|
|
| 307 |
|
| 308 |
PAGE-TO-TOPIC MAPPING GUIDANCE (for correct citations):
|
| 309 |
+
- Page 6: Standard initiation of treatment criteria, management algorithm.
|
| 310 |
+
- Page 7: Monitoring of untreated patients, CHB treatment principles.
|
| 311 |
- Page 8: Treatment drugs/regimens (ETV, TDF, TAF), agents not recommended.
|
| 312 |
+
- Page 9: Special populations (HBV-HCV, HBV-HDV, HBV-HIV, Immunocompromised).
|
| 313 |
+
- Page 10: Pregnancy-related recommendations.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
+
---
|
| 316 |
+
EXAMPLE OUTPUT (ELIGIBLE - STANDARD CRITERIA) - Use SINGLE \\n only:
|
| 317 |
+
{{
|
| 318 |
+
"eligible": true,
|
| 319 |
+
"recommendations": "Eligibility and Rationale:\\n- Eligible: HBV DNA > 2,000 IU/mL, ALT > ULN, moderate fibrosis (Grade A) [SASLT 2021, Page 6]\\nTreatment Recommendations:\\n- Start monotherapy with ETV, TDF, or TAF (Grade A) [SASLT 2021, Page 8]\\nMonitoring and Follow-up:\\n- Monitor treatment response per SASLT protocol [SASLT 2021, Page 7]\\nReferences:\\n- Pages 6, 7, 8: Treatment criteria, drugs, monitoring"
|
| 320 |
+
}}
|
| 321 |
+
---
|
| 322 |
+
EXAMPLE OUTPUT (NOT ELIGIBLE - STANDARD CRITERIA) - Use SINGLE \\n only:
|
| 323 |
+
{{
|
| 324 |
+
"eligible": false,
|
| 325 |
+
"recommendations": "Eligibility and Rationale:\\n- Not eligible: HBV DNA < 2,000 IU/mL, ALT ≤ ULN, no significant fibrosis [SASLT 2021, Page 6]\\nTreatment Recommendations:\\n- Treatment not indicated at this time [SASLT 2021, Page 6]\\nMonitoring and Follow-up:\\n- Monitor every 6-12 months (HBeAg-negative, HBV DNA < 2,000 IU/mL) (Grade B) [SASLT 2021, Page 7]\\nReferences:\\n- Pages 6, 7: Treatment criteria, monitoring protocols"
|
| 326 |
+
}}
|
| 327 |
+
---
|
| 328 |
+
EXAMPLE OUTPUT (ELIGIBLE - IMMUNOSUPPRESSION) - Use SINGLE \\n only:
|
| 329 |
+
{{
|
| 330 |
+
"eligible": true,
|
| 331 |
+
"recommendations": "Eligibility and Rationale:\\n- Eligible: HBsAg-positive patient requires prophylaxis for immunosuppressive therapy (Grade A) [SASLT 2021, Page 9]\\nTreatment Recommendations:\\n- Start antiviral prophylaxis (e.g., ETV, TDF, TAF) [SASLT 2021, Page 8, 9]\\nMonitoring and Follow-up:\\n- Continue prophylaxis for ≥6 months after immunosuppression (12 for anti-CD20) [SASLT 2021, Page 9]\\nReferences:\\n- Pages 8, 9: Prophylaxis criteria, drug options, monitoring"
|
| 332 |
+
}}
|
| 333 |
+
---
|
| 334 |
+
EXAMPLE OUTPUT (ELIGIBLE - HIV COINFECTION) - Use SINGLE \\n only:
|
| 335 |
+
{{
|
| 336 |
+
"eligible": true,
|
| 337 |
+
"recommendations": "Eligibility and Rationale:\\n- Eligible: Patient has HBV-HIV coinfection and should start ART (Grade A) [SASLT 2021, Page 9]\\nTreatment Recommendations:\\n- ART regimen must include TDF- or TAF-based therapy (Grade A) [SASLT 2021, Page 9]\\nMonitoring and Follow-up:\\n- Monitor patient closely after ART initiation for immune reconstitution [SASLT 2021, Page 9]\\nReferences:\\n- Page 9: HBV-HIV coinfection management, ART regimens"
|
| 338 |
+
}}
|
| 339 |
+
---
|
| 340 |
CRITICAL REQUIREMENTS:
|
| 341 |
1. Base assessment ONLY on SASLT 2021 guidelines provided.
|
| 342 |
+
2. Follow the CRITICAL ELIGIBILITY HIERARCHY to avoid contradictions.
|
| 343 |
+
3. Keep output SHORT: 8-12 bullets total, 1 sentence per bullet (max 25 words).
|
| 344 |
+
4. Use ONLY the 5 sections listed above.
|
| 345 |
+
5. Cite exact page at end of each bullet: [SASLT 2021, Page X].
|
| 346 |
6. Return ONLY valid JSON, no markdown, no extra text.
|
| 347 |
"""
|
| 348 |
|
core/utils.py
CHANGED
|
@@ -9,7 +9,7 @@ from typing import List, Optional, Iterable
|
|
| 9 |
from langchain.schema import Document
|
| 10 |
from langchain_community.vectorstores import FAISS
|
| 11 |
|
| 12 |
-
from .config import get_embedding_model, VECTOR_STORE_DIR, CHUNKS_PATH, NEW_DATA, PROCESSED_DATA
|
| 13 |
from .text_processors import markdown_splitter, recursive_splitter
|
| 14 |
from . import data_loaders
|
| 15 |
|
|
@@ -126,7 +126,15 @@ def _load_documents_for_file(file_path: Path) -> List[Document]:
|
|
| 126 |
"""
|
| 127 |
try:
|
| 128 |
if file_path.suffix.lower() == '.pdf':
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
return data_loaders.load_markdown_documents(file_path)
|
| 131 |
except Exception as e:
|
| 132 |
logger.error(f"Failed to load {file_path}: {e}")
|
|
|
|
| 9 |
from langchain.schema import Document
|
| 10 |
from langchain_community.vectorstores import FAISS
|
| 11 |
|
| 12 |
+
from .config import get_embedding_model, VECTOR_STORE_DIR, CHUNKS_PATH, NEW_DATA, PROCESSED_DATA, settings
|
| 13 |
from .text_processors import markdown_splitter, recursive_splitter
|
| 14 |
from . import data_loaders
|
| 15 |
|
|
|
|
| 126 |
"""
|
| 127 |
try:
|
| 128 |
if file_path.suffix.lower() == '.pdf':
|
| 129 |
+
# Use advanced LlamaParse loader with settings from config
|
| 130 |
+
api_key = settings.LLAMA_CLOUD_API_KEY
|
| 131 |
+
premium_mode = settings.LLAMA_PREMIUM_MODE
|
| 132 |
+
|
| 133 |
+
return data_loaders.load_pdf_documents_advanced(
|
| 134 |
+
file_path,
|
| 135 |
+
api_key=api_key,
|
| 136 |
+
premium_mode=premium_mode
|
| 137 |
+
)
|
| 138 |
return data_loaders.load_markdown_documents(file_path)
|
| 139 |
except Exception as e:
|
| 140 |
logger.error(f"Failed to load {file_path}: {e}")
|
data/HBV_Eligibility_TestCases - To Be Tested(Sheet1).csv
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Case ID,Age,Sex,Pregnancy Status,HBsAg,HBeAg,HBV DNA (IU/mL),ALT (U/L),Fibrosis/Cirrhosis Stage,Necroinflammation,Extrahepatic Manifestations,Immunosuppressive Therapy,Coinfections,Family History of HCC/Cirrhosis,Smoking,Comorbidities,Eligibility,Rationale
|
| 2 |
+
Case1,28,M,No,Positive (36 months),Positive,8500000,120,F1,Moderate,No,None,None,No,No,None,,
|
| 3 |
+
Case2,45,F,No,Positive (60 months),Negative,15000,65,F1�F2,Moderate,No,None,None,No,No,Metabolic syndrome,,
|
| 4 |
+
Case3,52,M,No,Positive (120 months),Negative,3500,32,F2�F3,Mild,No,None,None,No,No,None,,
|
| 5 |
+
Case4,60,M,No,Positive (240 months),Negative,700,28,F4 (compensated),Mild,No,None,None,No,No,None,,
|
| 6 |
+
Case5,22,F,No,Positive (18 months),Positive,25000000,27,F0,None,No,None,None,No,No,None,,
|
| 7 |
+
Case6,40,M,No,Positive (84 months),Negative,900,22,F0�F1,None,No,None,None,No,No,None,,
|
| 8 |
+
Case7,31,F,Yes (28 weeks),Positive (48 months),Positive,500000,35,F1,Mild,No,None,None,No,No,None,,
|
| 9 |
+
Case8,35,M,No,Positive (72 months),Positive,9000000,33,F1,Mild,No,None,None,No,No,None,,
|
| 10 |
+
Case9,55,F,No,Positive (180 months),Negative,6200,45,F1�F2,Moderate,No,None,None,Yes (father HCC),No,None,,
|
| 11 |
+
Case10,63,M,No,Positive (300 months),Negative,50,32,F4 (decompensated),Mild,No,None,None,No,No,None,,
|
| 12 |
+
Case11,68,M,No,Positive (25 years),Negative,5800,41,F2,Mild,No,None,None,No,No,Diabetes,,
|
| 13 |
+
Case12,50,F,No,Positive,Negative,450,32,N/A,Mild,No,Yes (tacrolimus),None,No,No,Post-transplant,,
|
| 14 |
+
Case13,27,M,No,Positive (36 months),Negative,1700,55,F1,Mild,No,None,None,No,No,None,,
|
| 15 |
+
Case14,44,F,No,Positive,Negative,3000,70,F2,Moderate,No,None,HCV RNA positive,No,No,None,,
|
| 16 |
+
Case15,33,M,No,Positive,Negative,25000,30,F1,Mild,No,None,HIV positive,No,No,None,,
|
| 17 |
+
Case16,30,F,Yes (30 weeks),Positive,Positive,40000,28,F0,None,No,None,None,No,No,None,,
|
| 18 |
+
Case17,39,M,No,Positive,Negative,4800,250,F1,Marked,No,None,None,No,No,None,,
|
| 19 |
+
Case18,49,F,No,Positive,Negative,2300,37,F1,Mild,No,None,None,Yes (mother cirrhosis),No,None,,
|
| 20 |
+
Case19,56,M,No,Positive,Negative,10,30,F4 (compensated),Mild,No,None,None,No,No,None,,
|
| 21 |
+
Case20,41,F,No,Positive (10 years),Negative,1900,28,F1,Mild,No,None,None,No,No,None,,
|
| 22 |
+
Case21,56,F,No,Positive,Negative,0,20,F4 (compensated),Mild,No,None,None,No,No,None,,
|
| 23 |
+
Case22,70,M,No,Positive (30 years),Negative,1800,27,F2,Mild,No,None,None,No,Yes (smoker),Hypertension,,
|
| 24 |
+
Case23,33,F,Yes (12 weeks),Positive,Positive,300000,40,F1,Mild,No,None,None,No,No,None,,
|
| 25 |
+
Case24,46,M,No,Positive (8 years),Negative,50000,48,F2,Moderate,Yes (vasculitis),None,None,No,No,None,,
|
| 26 |
+
Case25,58,F,No,Positive,Negative,2000,60,F2,Moderate,No,None,None,No,No,CKD stage 3,,
|
| 27 |
+
Case26,29,M,No,Positive,Positive,2500000,33,F0,None,No,None,None,No,No,None,,
|
| 28 |
+
Case27,54,M,No,Positive,Negative,750,18,F4 (compensated),Mild,No,None,None,No,No,None,,
|
| 29 |
+
Case28,38,F,No,Positive,Negative,6000,80,F2,Moderate,No,None,None,No,No,Obesity,,
|
| 30 |
+
Case29,42,M,No,Positive,Negative,1200,25,F1,Mild,No,None,None,No,No,None,,
|
| 31 |
+
Case30,25,F,No,Positive (2 years),Positive,12000000,95,F1,Moderate,No,None,None,No,No,None,,
|
test_assessment_fixed.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import requests
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
+
from typing import Optional, List
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def parse_hbsag_duration(hbsag_value: str) -> int:
|
| 9 |
+
"""Extract duration in months from HBsAg status string."""
|
| 10 |
+
if pd.isna(hbsag_value):
|
| 11 |
+
return 6 # Default to 6 months if not specified
|
| 12 |
+
|
| 13 |
+
# Extract number and unit from strings like "Positive (36 months)" or "Positive (10 years)"
|
| 14 |
+
match = re.search(r'\((\d+)\s*(months?|years?)\)', str(hbsag_value))
|
| 15 |
+
if match:
|
| 16 |
+
value = int(match.group(1))
|
| 17 |
+
unit = match.group(2).lower()
|
| 18 |
+
return value * 12 if 'year' in unit else value
|
| 19 |
+
|
| 20 |
+
# If just "Positive" with no duration, default to 6 months
|
| 21 |
+
return 6
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def parse_status(value: str) -> str:
|
| 25 |
+
"""Parse status values to 'Positive' or 'Negative' (exact capitalization required)."""
|
| 26 |
+
if pd.isna(value):
|
| 27 |
+
return "Negative"
|
| 28 |
+
val_lower = str(value).lower()
|
| 29 |
+
if 'positive' in val_lower:
|
| 30 |
+
return "Positive"
|
| 31 |
+
elif 'negative' in val_lower:
|
| 32 |
+
return "Negative"
|
| 33 |
+
return "Negative"
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def parse_sex(value: str) -> str:
|
| 37 |
+
"""Parse sex to 'Male' or 'Female' (exact capitalization required)."""
|
| 38 |
+
if pd.isna(value):
|
| 39 |
+
return "Male"
|
| 40 |
+
val_lower = str(value).lower()
|
| 41 |
+
if val_lower in ['m', 'male']:
|
| 42 |
+
return "Male"
|
| 43 |
+
elif val_lower in ['f', 'female']:
|
| 44 |
+
return "Female"
|
| 45 |
+
return "Male"
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def parse_pregnancy_status(sex: str, value: str) -> str:
|
| 49 |
+
"""Parse pregnancy status to 'Not pregnant' or 'Pregnant' (exact capitalization required)."""
|
| 50 |
+
if sex == "Male":
|
| 51 |
+
return "Not pregnant"
|
| 52 |
+
if pd.isna(value):
|
| 53 |
+
return "Not pregnant"
|
| 54 |
+
val_lower = str(value).lower()
|
| 55 |
+
if 'yes' in val_lower or 'pregnant' in val_lower:
|
| 56 |
+
return "Pregnant"
|
| 57 |
+
return "Not pregnant"
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def parse_boolean(value: str) -> bool:
|
| 61 |
+
"""Parse Yes/No values to boolean."""
|
| 62 |
+
if pd.isna(value):
|
| 63 |
+
return False
|
| 64 |
+
val_lower = str(value).lower()
|
| 65 |
+
return 'yes' in val_lower
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def parse_fibrosis_stage(value: str) -> str:
|
| 69 |
+
"""Extract fibrosis stage - must be 'F0-F1', 'F2-F3', or 'F4'."""
|
| 70 |
+
if pd.isna(value) or value == "N/A":
|
| 71 |
+
return "F0-F1"
|
| 72 |
+
|
| 73 |
+
val_str = str(value).upper()
|
| 74 |
+
|
| 75 |
+
# Map specific values
|
| 76 |
+
if 'F4' in val_str or 'CIRRHOSIS' in val_str.upper():
|
| 77 |
+
return "F4"
|
| 78 |
+
elif 'F3' in val_str or 'F2' in val_str:
|
| 79 |
+
return "F2-F3"
|
| 80 |
+
elif 'F1' in val_str or 'F0' in val_str:
|
| 81 |
+
return "F0-F1"
|
| 82 |
+
|
| 83 |
+
return "F0-F1"
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def parse_necroinflammation(value: str) -> str:
|
| 87 |
+
"""Parse necroinflammation activity - must be 'A0', 'A1', 'A2', or 'A3'."""
|
| 88 |
+
if pd.isna(value) or str(value).lower() == "none":
|
| 89 |
+
return "A0"
|
| 90 |
+
|
| 91 |
+
val_str = str(value).upper()
|
| 92 |
+
|
| 93 |
+
# Map specific values
|
| 94 |
+
if 'A3' in val_str or 'SEVERE' in val_str:
|
| 95 |
+
return "A3"
|
| 96 |
+
elif 'A2' in val_str or 'MODERATE' in val_str:
|
| 97 |
+
return "A2"
|
| 98 |
+
elif 'A1' in val_str or 'MILD' in val_str:
|
| 99 |
+
return "A1"
|
| 100 |
+
elif 'A0' in val_str or 'MINIMAL' in val_str:
|
| 101 |
+
return "A0"
|
| 102 |
+
|
| 103 |
+
return "A1"
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def parse_immunosuppression(value: str) -> str:
|
| 107 |
+
"""Parse immunosuppression therapy status - must be 'None', 'Chemotherapy', or 'Other'."""
|
| 108 |
+
if pd.isna(value) or str(value).lower() == "none":
|
| 109 |
+
return "None"
|
| 110 |
+
|
| 111 |
+
val_lower = str(value).lower()
|
| 112 |
+
if 'chemo' in val_lower:
|
| 113 |
+
return "Chemotherapy"
|
| 114 |
+
elif 'none' in val_lower:
|
| 115 |
+
return "None"
|
| 116 |
+
else:
|
| 117 |
+
return "Other"
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def parse_coinfections(value: str) -> List[str]:
|
| 121 |
+
"""Parse coinfections - must be from list: HIV, HCV, HDV."""
|
| 122 |
+
if pd.isna(value) or str(value).lower() == "none":
|
| 123 |
+
return []
|
| 124 |
+
|
| 125 |
+
coinfections = []
|
| 126 |
+
val_upper = str(value).upper()
|
| 127 |
+
|
| 128 |
+
if 'HCV' in val_upper:
|
| 129 |
+
coinfections.append("HCV")
|
| 130 |
+
if 'HIV' in val_upper:
|
| 131 |
+
coinfections.append("HIV")
|
| 132 |
+
if 'HDV' in val_upper:
|
| 133 |
+
coinfections.append("HDV")
|
| 134 |
+
|
| 135 |
+
return coinfections
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def parse_comorbidities(value: str) -> Optional[List[str]]:
|
| 139 |
+
"""Parse other comorbidities."""
|
| 140 |
+
if pd.isna(value) or str(value).lower() == "none":
|
| 141 |
+
return None
|
| 142 |
+
return [str(value)]
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def create_api_payload(row: pd.Series) -> dict:
|
| 146 |
+
"""Create API request payload from CSV row."""
|
| 147 |
+
sex = parse_sex(row['Sex'])
|
| 148 |
+
|
| 149 |
+
return {
|
| 150 |
+
"sex": sex,
|
| 151 |
+
"age": int(row['Age']),
|
| 152 |
+
"pregnancy_status": parse_pregnancy_status(sex, row['Pregnancy Status']),
|
| 153 |
+
"hbsag_status": parse_status(row['HBsAg']),
|
| 154 |
+
"duration_hbsag_months": parse_hbsag_duration(row['HBsAg']),
|
| 155 |
+
"hbv_dna_level": float(row['HBV DNA (IU/mL)']),
|
| 156 |
+
"hbeag_status": parse_status(row['HBeAg']),
|
| 157 |
+
"alt_level": float(row['ALT (U/L)']),
|
| 158 |
+
"fibrosis_stage": parse_fibrosis_stage(row['Fibrosis/Cirrhosis Stage']),
|
| 159 |
+
"necroinflammatory_activity": parse_necroinflammation(row['Necroinflammation']),
|
| 160 |
+
"extrahepatic_manifestations": parse_boolean(row['Extrahepatic Manifestations']),
|
| 161 |
+
"immunosuppression_status": parse_immunosuppression(row['Immunosuppressive Therapy']),
|
| 162 |
+
"coinfections": parse_coinfections(row['Coinfections']),
|
| 163 |
+
"family_history_cirrhosis_hcc": parse_boolean(row['Family History of HCC/Cirrhosis']),
|
| 164 |
+
"other_comorbidities": parse_comorbidities(row['Comorbidities'])
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def assess_case(payload: dict, api_url: str) -> dict:
|
| 169 |
+
"""Call the API to assess eligibility."""
|
| 170 |
+
try:
|
| 171 |
+
response = requests.post(
|
| 172 |
+
api_url,
|
| 173 |
+
json=payload,
|
| 174 |
+
headers={'Content-Type': 'application/json'},
|
| 175 |
+
timeout=30
|
| 176 |
+
)
|
| 177 |
+
response.raise_for_status()
|
| 178 |
+
return response.json()
|
| 179 |
+
except requests.exceptions.HTTPError as e:
|
| 180 |
+
# Try to get detailed error message
|
| 181 |
+
try:
|
| 182 |
+
error_detail = response.json()
|
| 183 |
+
print(f"API Error Details: {json.dumps(error_detail, indent=2)}")
|
| 184 |
+
except:
|
| 185 |
+
print(f"API Error: {e}")
|
| 186 |
+
return {"eligible": None, "recommendations": f"Error: {str(e)}"}
|
| 187 |
+
except requests.exceptions.RequestException as e:
|
| 188 |
+
print(f"API Error: {e}")
|
| 189 |
+
return {"eligible": None, "recommendations": f"Error: {str(e)}"}
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def main():
|
| 193 |
+
# Configuration
|
| 194 |
+
input_file = r"D:\Work\HBV AI Assistant\data\HBV_Eligibility_TestCases - To Be Tested(Sheet1).csv"
|
| 195 |
+
output_file = "HBV_Eligibility_Results.csv"
|
| 196 |
+
api_url = "https://moazx-hbv-ai-assistant.hf.space/assess"
|
| 197 |
+
|
| 198 |
+
# Read CSV
|
| 199 |
+
print(f"Reading {input_file}...")
|
| 200 |
+
df = pd.read_csv(input_file, encoding='windows-1252')
|
| 201 |
+
|
| 202 |
+
# Add columns for results
|
| 203 |
+
df['Eligibility'] = None
|
| 204 |
+
df['Rationale'] = None
|
| 205 |
+
|
| 206 |
+
# Process each case
|
| 207 |
+
print(f"\nProcessing {len(df)} cases...")
|
| 208 |
+
for idx, row in df.iterrows():
|
| 209 |
+
case_id = row['Case ID']
|
| 210 |
+
print(f"\nProcessing {case_id}...")
|
| 211 |
+
|
| 212 |
+
# Create payload
|
| 213 |
+
payload = create_api_payload(row)
|
| 214 |
+
print(f"Payload: {json.dumps(payload, indent=2)}")
|
| 215 |
+
|
| 216 |
+
# Call API
|
| 217 |
+
result = assess_case(payload, api_url)
|
| 218 |
+
print(f"Result: {result}")
|
| 219 |
+
|
| 220 |
+
# Update dataframe
|
| 221 |
+
df.at[idx, 'Eligibility'] = result.get('eligible')
|
| 222 |
+
df.at[idx, 'Rationale'] = result.get('recommendations', '')
|
| 223 |
+
|
| 224 |
+
# Save results
|
| 225 |
+
print(f"\nSaving results to {output_file}...")
|
| 226 |
+
df.to_csv(output_file, index=False)
|
| 227 |
+
print("Done!")
|
| 228 |
+
|
| 229 |
+
# Print summary
|
| 230 |
+
eligible_count = df['Eligibility'].sum() if df['Eligibility'].notna().any() else 0
|
| 231 |
+
print(f"\nSummary:")
|
| 232 |
+
print(f"Total cases: {len(df)}")
|
| 233 |
+
print(f"Eligible: {eligible_count}")
|
| 234 |
+
print(f"Not eligible: {len(df) - eligible_count}")
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
if __name__ == "__main__":
|
| 238 |
+
main()
|