""" utils/api_clients.py ------------------------------------------------ Enhanced API clients for: - PubMed (NCBI) - ClinicalTrials.gov - FDA Open Data - WHO ICTRP ------------------------------------------------ Optimized for hybrid VAN-based query processing: - Automatically truncates long queries (top keywords only) - Resilient to API downtime or malformed responses - HTML formatted results for Gradio rendering """ import requests import html import re import traceback # ============================================================ # 🔹 Query Normalization # ============================================================ def _normalize_query(query: str, max_words: int = 5) -> str: """ Cleans and shortens user query for API compatibility. Removes filler phrases and limits to key words. """ q = query.lower() q = re.sub( r"(what is|define|explain|describe|in clinical trials|the meaning of|tell me about|explanation of|concept of)\b", "", q, ) q = re.sub(r"[^a-z0-9\s]", "", q) q = re.sub(r"\s+", " ", q).strip() # limit to first few words (avoid 404s from overlong queries) words = q.split() q = " ".join(words[:max_words]) return q or "clinical trial" # ============================================================ # 🔹 PubMed API (NCBI E-Utilities) # ============================================================ def fetch_pubmed(query: str, limit: int = 3) -> str: try: q = _normalize_query(query) base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" esearch = f"{base}esearch.fcgi?db=pubmed&term={q}&retmax={limit}&retmode=json" res = requests.get(esearch, timeout=10) res.raise_for_status() ids = res.json().get("esearchresult", {}).get("idlist", []) if not ids: return f"No PubMed results found for {html.escape(q)}." summaries = [] for pmid in ids: summary_url = f"{base}esummary.fcgi?db=pubmed&id={pmid}&retmode=json" sres = requests.get(summary_url, timeout=10) sres.raise_for_status() doc = sres.json()["result"].get(pmid, {}) title = html.escape(doc.get("title", "Untitled")) source = html.escape(doc.get("source", "")) pubdate = html.escape(doc.get("pubdate", "")) link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" summaries.append( f"{title}
{source} ({pubdate})
" f"[PubMed]" ) return "

".join(summaries) except Exception as e: traceback.print_exc() return f"PubMed fetch failed for {html.escape(query)}: {e}" # ============================================================ # 🔹 ClinicalTrials.gov API # ============================================================ def fetch_clinicaltrials(query: str, limit: int = 3) -> str: """ Retrieves brief summaries of matching trials from ClinicalTrials.gov. Automatically truncates query to avoid 404s on long input. """ try: q = _normalize_query(query) url = ( f"https://clinicaltrials.gov/api/query/study_fields?" f"expr={q}&fields=NCTId,BriefTitle,Condition,OverallStatus" f"&max_rnk={limit}&fmt=json" ) res = requests.get(url, timeout=10) res.raise_for_status() studies = res.json().get("StudyFieldsResponse", {}).get("StudyFields", []) if not studies: return f"No trials found for {html.escape(q)}." formatted = [] for s in studies: nct = s.get("NCTId", [""])[0] title = html.escape(s.get("BriefTitle", [""])[0]) condition = html.escape(", ".join(s.get("Condition", []))) status = html.escape(s.get("OverallStatus", ["Unknown"])[0]) link = f"https://clinicaltrials.gov/study/{nct}" if nct else "#" formatted.append( f"{title}
" f"Condition: {condition or 'N/A'}
" f"Status: {status}
" f"[ClinicalTrials.gov]" ) return "

".join(formatted) except Exception as e: traceback.print_exc() return f"ClinicalTrials.gov fetch failed for {html.escape(query)}: {e}" # ============================================================ # 🔹 FDA Open Data API # ============================================================ def fetch_fda(query: str, limit: int = 3) -> str: """ Retrieves FDA label and safety data for a given compound/drug name. """ try: q = _normalize_query(query) url = f"https://api.fda.gov/drug/label.json?search=openfda.brand_name:{q}&limit={limit}" res = requests.get(url, timeout=10) if res.status_code == 404: return f"No FDA data found for {html.escape(q)}." res.raise_for_status() data = res.json().get("results", []) if not data: return f"No FDA label results found for {html.escape(q)}." formatted = [] for entry in data: brand = ", ".join(entry.get("openfda", {}).get("brand_name", [])) generic = ", ".join(entry.get("openfda", {}).get("generic_name", [])) purpose = html.escape(" ".join(entry.get("purpose", [])[:1])) warnings = html.escape(" ".join(entry.get("warnings", [])[:1])) link = "https://open.fda.gov/drug/label/" formatted.append( f"{brand or q} ({generic or 'N/A'})
" f"Purpose: {purpose or 'N/A'}
" f"Warning: {warnings or 'N/A'}
" f"[FDA Label]" ) return "

".join(formatted) except Exception as e: traceback.print_exc() return f"FDA fetch failed for {html.escape(query)}: {e}" # ============================================================ # 🔹 WHO ICTRP (Backup Trial Source) # ============================================================ def fetch_who_trials(query: str, limit: int = 2) -> str: """ Optional backup trial search from WHO ICTRP API. Returns simplified summaries for readability. """ try: q = _normalize_query(query) url = f"https://trialsearch.who.int/api/TrialSearch?query={q}" res = requests.get(url, timeout=10) if res.status_code != 200: return "WHO ICTRP API unavailable or throttled." trials = res.json().get("TrialSearchResult", []) if not trials: return f"No WHO trials found for {html.escape(q)}." formatted = [] for t in trials[:limit]: title = html.escape(t.get("Scientific_title", "Untitled")) registry = html.escape(t.get("Register", "")) country = html.escape(t.get("Recruitment_Country", "")) formatted.append( f"{title}
{registry or 'Registry Unknown'} — {country or 'N/A'}" ) return "

".join(formatted) except Exception as e: traceback.print_exc() return f"WHO ICTRP fetch failed for {html.escape(query)}: {e}"