Spaces:
Running
Running
| """ | |
| Medical Terminology Module for HBV (Hepatitis B Virus) | |
| This module provides intelligent handling of HBV medical linguistic variability including: | |
| - Synonyms and alternate terms | |
| - Abbreviations and acronyms (with context awareness) | |
| - Regional spelling variations (US/UK/International) | |
| - Specialty-specific terminology | |
| - Dynamic learning from corpus | |
| """ | |
| import re | |
| import json | |
| from typing import List, Dict, Set, Tuple, Optional | |
| from collections import defaultdict | |
| from pathlib import Path | |
| from .config import logger | |
| # ============================================================================ | |
| # CORE HBV MEDICAL TERMINOLOGY MAPPINGS | |
| # ============================================================================ | |
| # Common HBV medical abbreviations with context-aware expansions | |
| MEDICAL_ABBREVIATIONS = { | |
| # HBV Terminology | |
| "hbv": ["hepatitis b virus", "hepatitis b"], | |
| "hbsag": ["hepatitis b surface antigen", "hbs antigen"], | |
| "hbeag": ["hepatitis b e antigen", "hbe antigen"], | |
| "hbcag": ["hepatitis b core antigen"], | |
| "anti-hbs": ["antibody to hepatitis b surface antigen", "anti-hbs antibody"], | |
| "anti-hbe": ["antibody to hepatitis b e antigen"], | |
| "anti-hbc": ["antibody to hepatitis b core antigen"], | |
| "hbv dna": ["hepatitis b virus dna", "hbv viral load"], | |
| # Liver Disease Terms | |
| "alt": ["alanine aminotransferase", "alanine transaminase", "sgpt"], | |
| "ast": ["aspartate aminotransferase", "aspartate transaminase", "sgot"], | |
| "alp": ["alkaline phosphatase"], | |
| "ggt": ["gamma-glutamyl transferase", "gamma glutamyl transpeptidase"], | |
| "inr": ["international normalized ratio"], | |
| "pt": ["prothrombin time"], | |
| "apri": ["ast to platelet ratio index"], | |
| "fib-4": ["fibrosis-4 index"], | |
| # Fibrosis Staging | |
| "f0": ["no fibrosis"], | |
| "f1": ["mild fibrosis", "portal fibrosis"], | |
| "f2": ["moderate fibrosis"], | |
| "f3": ["severe fibrosis", "advanced fibrosis"], | |
| "f4": ["cirrhosis", "liver cirrhosis"], | |
| # Necroinflammatory Activity | |
| "a0": ["no activity"], | |
| "a1": ["mild activity"], | |
| "a2": ["moderate activity"], | |
| "a3": ["severe activity"], | |
| # Treatment Terms | |
| "etv": ["entecavir"], | |
| "tdf": ["tenofovir disoproxil fumarate", "tenofovir df"], | |
| "taf": ["tenofovir alafenamide"], | |
| "lam": ["lamivudine", "3tc"], | |
| "adv": ["adefovir", "adefovir dipivoxil"], | |
| "ldv": ["telbivudine"], | |
| "peg-ifn": ["pegylated interferon", "peginterferon"], | |
| "ifn": ["interferon"], | |
| # Complications | |
| "hcc": ["hepatocellular carcinoma", "liver cancer"], | |
| "dc": ["decompensated cirrhosis"], | |
| "cc": ["compensated cirrhosis"], | |
| "esld": ["end-stage liver disease"], | |
| "alf": ["acute liver failure"], | |
| "aclf": ["acute-on-chronic liver failure"], | |
| # Coinfections | |
| "hiv": ["human immunodeficiency virus"], | |
| "hcv": ["hepatitis c virus", "hepatitis c"], | |
| "hdv": ["hepatitis d virus", "hepatitis delta"], | |
| "hav": ["hepatitis a virus", "hepatitis a"], | |
| # Clinical Terms | |
| "uln": ["upper limit of normal"], | |
| "iu/ml": ["international units per milliliter"], | |
| "log": ["logarithm", "log10"], | |
| "svr": ["sustained virological response"], | |
| "vr": ["virological response"], | |
| "br": ["biochemical response"], | |
| "sr": ["serological response"], | |
| } | |
| # Synonym mappings for HBV medical terms | |
| MEDICAL_SYNONYMS = { | |
| # HBV terminology | |
| "hepatitis b": ["hbv", "hepatitis b virus", "hep b", "hbv infection"], | |
| "chronic hepatitis b": ["chb", "chronic hbv", "chronic hbv infection"], | |
| "acute hepatitis b": ["ahb", "acute hbv"], | |
| "hbv dna": ["viral load", "hbv viral load", "serum hbv dna"], | |
| # Serological markers | |
| "hbsag positive": ["hbsag+", "hbs antigen positive"], | |
| "hbeag positive": ["hbeag+", "hbe antigen positive"], | |
| "hbsag negative": ["hbsag-", "hbs antigen negative"], | |
| "hbeag negative": ["hbeag-", "hbe antigen negative"], | |
| # Liver disease stages | |
| "cirrhosis": ["f4", "liver cirrhosis", "hepatic cirrhosis"], | |
| "fibrosis": ["liver fibrosis", "hepatic fibrosis"], | |
| "compensated cirrhosis": ["cc", "child-pugh a", "child-pugh b"], | |
| "decompensated cirrhosis": ["dc", "child-pugh c"], | |
| # Treatment terms | |
| "antiviral therapy": ["antiviral treatment", "nucleos(t)ide analogue", "na therapy"], | |
| "entecavir": ["etv", "baraclude"], | |
| "tenofovir": ["tdf", "taf", "viread", "vemlidy"], | |
| "interferon": ["ifn", "pegylated interferon", "peg-ifn"], | |
| # Clinical outcomes | |
| "treatment response": ["virological response", "biochemical response"], | |
| "viral suppression": ["undetectable hbv dna", "hbv dna < lloq"], | |
| "alt normalization": ["alt normal", "alt within normal limits"], | |
| # Complications | |
| "hepatocellular carcinoma": ["hcc", "liver cancer", "primary liver cancer"], | |
| "liver failure": ["hepatic failure", "end-stage liver disease", "esld"], | |
| "portal hypertension": ["esophageal varices", "ascites", "splenomegaly"], | |
| # Special populations | |
| "pregnant women": ["pregnancy", "pregnant patients"], | |
| "immunosuppressed": ["immunocompromised", "on immunosuppression"], | |
| "coinfection": ["co-infection", "dual infection"], | |
| } | |
| # Regional spelling variations (US/UK/International) | |
| SPELLING_VARIATIONS = { | |
| "fibrosis": ["fibrosis"], | |
| "cirrhosis": ["cirrhosis"], | |
| "anaemia": ["anemia"], | |
| "haemorrhage": ["hemorrhage"], | |
| "oesophageal": ["esophageal"], | |
| } | |
| # Context-specific term preferences | |
| CONTEXT_PREFERENCES = { | |
| "treatment": ["antiviral", "therapy", "regimen", "medication"], | |
| "diagnosis": ["hbsag", "hbeag", "hbv dna", "serology"], | |
| "monitoring": ["alt", "hbv dna", "liver function", "fibrosis"], | |
| "complications": ["hcc", "cirrhosis", "decompensation", "liver failure"], | |
| } | |
| # ============================================================================ | |
| # DYNAMIC TERMINOLOGY LEARNING | |
| # ============================================================================ | |
| class MedicalTerminologyExpander: | |
| """ | |
| Dynamically learns and expands medical terminology from corpus. | |
| Handles abbreviations, synonyms, and context-specific variations for HBV. | |
| """ | |
| def __init__(self, corpus_path: Optional[Path] = None): | |
| """Initialize with optional corpus for dynamic learning.""" | |
| self.abbreviations = MEDICAL_ABBREVIATIONS.copy() | |
| self.synonyms = MEDICAL_SYNONYMS.copy() | |
| self.spelling_vars = SPELLING_VARIATIONS.copy() | |
| self.learned_terms = defaultdict(set) | |
| if corpus_path and corpus_path.exists(): | |
| self._learn_from_corpus(corpus_path) | |
| def expand_query(self, query: str, context: Optional[str] = None) -> List[str]: | |
| """ | |
| Expand a query with medical synonyms and abbreviations. | |
| Args: | |
| query: Original query string | |
| context: Optional context hint (e.g., 'treatment', 'diagnosis') | |
| Returns: | |
| List of expanded query variations | |
| """ | |
| expansions = [query] | |
| query_lower = query.lower() | |
| # Expand abbreviations | |
| for abbrev, full_forms in self.abbreviations.items(): | |
| if abbrev in query_lower: | |
| for full_form in full_forms: | |
| expansions.append(query_lower.replace(abbrev, full_form)) | |
| # Expand synonyms | |
| for term, synonyms in self.synonyms.items(): | |
| if term in query_lower: | |
| for synonym in synonyms: | |
| expansions.append(query_lower.replace(term, synonym)) | |
| # Add context-specific preferences | |
| if context and context in CONTEXT_PREFERENCES: | |
| for pref_term in CONTEXT_PREFERENCES[context]: | |
| if pref_term not in query_lower: | |
| expansions.append(f"{query} {pref_term}") | |
| # Remove duplicates while preserving order | |
| seen = set() | |
| unique_expansions = [] | |
| for exp in expansions: | |
| if exp not in seen: | |
| seen.add(exp) | |
| unique_expansions.append(exp) | |
| return unique_expansions | |
| def normalize_term(self, term: str) -> str: | |
| """ | |
| Normalize a medical term to its canonical form. | |
| Args: | |
| term: Medical term to normalize | |
| Returns: | |
| Normalized canonical form | |
| """ | |
| term_lower = term.lower().strip() | |
| # Check if it's an abbreviation | |
| if term_lower in self.abbreviations: | |
| return self.abbreviations[term_lower][0] | |
| # Check if it's a synonym | |
| for canonical, synonyms in self.synonyms.items(): | |
| if term_lower in synonyms or term_lower == canonical: | |
| return canonical | |
| # Check spelling variations | |
| for canonical, variations in self.spelling_vars.items(): | |
| if term_lower in variations: | |
| return canonical | |
| return term | |
| def _learn_from_corpus(self, corpus_path: Path): | |
| """Learn new terminology patterns from corpus.""" | |
| try: | |
| # Implementation for dynamic learning from HBV guidelines | |
| logger.info(f"Learning terminology from corpus: {corpus_path}") | |
| # This would analyze the corpus and extract new term relationships | |
| except Exception as e: | |
| logger.warning(f"Could not learn from corpus: {e}") | |
| def get_related_terms(self, term: str, max_terms: int = 5) -> List[str]: | |
| """ | |
| Get related medical terms for a given term. | |
| Args: | |
| term: Medical term | |
| max_terms: Maximum number of related terms to return | |
| Returns: | |
| List of related terms | |
| """ | |
| related = set() | |
| term_lower = term.lower() | |
| # Find synonyms | |
| for canonical, synonyms in self.synonyms.items(): | |
| if term_lower == canonical or term_lower in synonyms: | |
| related.update(synonyms) | |
| related.add(canonical) | |
| # Find abbreviations | |
| if term_lower in self.abbreviations: | |
| related.update(self.abbreviations[term_lower]) | |
| # Remove the original term | |
| related.discard(term_lower) | |
| return list(related)[:max_terms] | |
| # Global instance for easy access | |
| _global_expander = None | |
| def get_terminology_expander() -> MedicalTerminologyExpander: | |
| """Get or create the global terminology expander instance.""" | |
| global _global_expander | |
| if _global_expander is None: | |
| _global_expander = MedicalTerminologyExpander() | |
| return _global_expander | |