"""
utils/api_clients.py
------------------------------------------------
Enhanced API clients for:
- PubMed (NCBI)
- ClinicalTrials.gov
- FDA Open Data
- WHO ICTRP
------------------------------------------------
Optimized for hybrid VAN-based query processing:
- Automatically truncates long queries (top keywords only)
- Resilient to API downtime or malformed responses
- HTML formatted results for Gradio rendering
"""
import requests
import html
import re
import traceback
# ============================================================
# 🔹 Query Normalization
# ============================================================
def _normalize_query(query: str, max_words: int = 5) -> str:
"""
Cleans and shortens user query for API compatibility.
Removes filler phrases and limits to key words.
"""
q = query.lower()
q = re.sub(
r"(what is|define|explain|describe|in clinical trials|the meaning of|tell me about|explanation of|concept of)\b",
"",
q,
)
q = re.sub(r"[^a-z0-9\s]", "", q)
q = re.sub(r"\s+", " ", q).strip()
# limit to first few words (avoid 404s from overlong queries)
words = q.split()
q = " ".join(words[:max_words])
return q or "clinical trial"
# ============================================================
# 🔹 PubMed API (NCBI E-Utilities)
# ============================================================
def fetch_pubmed(query: str, limit: int = 3) -> str:
try:
q = _normalize_query(query)
base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
esearch = f"{base}esearch.fcgi?db=pubmed&term={q}&retmax={limit}&retmode=json"
res = requests.get(esearch, timeout=10)
res.raise_for_status()
ids = res.json().get("esearchresult", {}).get("idlist", [])
if not ids:
return f"No PubMed results found for {html.escape(q)}."
summaries = []
for pmid in ids:
summary_url = f"{base}esummary.fcgi?db=pubmed&id={pmid}&retmode=json"
sres = requests.get(summary_url, timeout=10)
sres.raise_for_status()
doc = sres.json()["result"].get(pmid, {})
title = html.escape(doc.get("title", "Untitled"))
source = html.escape(doc.get("source", ""))
pubdate = html.escape(doc.get("pubdate", ""))
link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
summaries.append(
f"{title}
{source} ({pubdate})
"
f"[PubMed]"
)
return "
".join(summaries)
except Exception as e:
traceback.print_exc()
return f"PubMed fetch failed for {html.escape(query)}: {e}"
# ============================================================
# 🔹 ClinicalTrials.gov API
# ============================================================
def fetch_clinicaltrials(query: str, limit: int = 3) -> str:
"""
Retrieves brief summaries of matching trials from ClinicalTrials.gov.
Automatically truncates query to avoid 404s on long input.
"""
try:
q = _normalize_query(query)
url = (
f"https://clinicaltrials.gov/api/query/study_fields?"
f"expr={q}&fields=NCTId,BriefTitle,Condition,OverallStatus"
f"&max_rnk={limit}&fmt=json"
)
res = requests.get(url, timeout=10)
res.raise_for_status()
studies = res.json().get("StudyFieldsResponse", {}).get("StudyFields", [])
if not studies:
return f"No trials found for {html.escape(q)}."
formatted = []
for s in studies:
nct = s.get("NCTId", [""])[0]
title = html.escape(s.get("BriefTitle", [""])[0])
condition = html.escape(", ".join(s.get("Condition", [])))
status = html.escape(s.get("OverallStatus", ["Unknown"])[0])
link = f"https://clinicaltrials.gov/study/{nct}" if nct else "#"
formatted.append(
f"{title}
"
f"Condition: {condition or 'N/A'}
"
f"Status: {status}
"
f"[ClinicalTrials.gov]"
)
return "
".join(formatted)
except Exception as e:
traceback.print_exc()
return f"ClinicalTrials.gov fetch failed for {html.escape(query)}: {e}"
# ============================================================
# 🔹 FDA Open Data API
# ============================================================
def fetch_fda(query: str, limit: int = 3) -> str:
"""
Retrieves FDA label and safety data for a given compound/drug name.
"""
try:
q = _normalize_query(query)
url = f"https://api.fda.gov/drug/label.json?search=openfda.brand_name:{q}&limit={limit}"
res = requests.get(url, timeout=10)
if res.status_code == 404:
return f"No FDA data found for {html.escape(q)}."
res.raise_for_status()
data = res.json().get("results", [])
if not data:
return f"No FDA label results found for {html.escape(q)}."
formatted = []
for entry in data:
brand = ", ".join(entry.get("openfda", {}).get("brand_name", []))
generic = ", ".join(entry.get("openfda", {}).get("generic_name", []))
purpose = html.escape(" ".join(entry.get("purpose", [])[:1]))
warnings = html.escape(" ".join(entry.get("warnings", [])[:1]))
link = "https://open.fda.gov/drug/label/"
formatted.append(
f"{brand or q} ({generic or 'N/A'})
"
f"Purpose: {purpose or 'N/A'}
"
f"Warning: {warnings or 'N/A'}
"
f"[FDA Label]"
)
return "
".join(formatted)
except Exception as e:
traceback.print_exc()
return f"FDA fetch failed for {html.escape(query)}: {e}"
# ============================================================
# 🔹 WHO ICTRP (Backup Trial Source)
# ============================================================
def fetch_who_trials(query: str, limit: int = 2) -> str:
"""
Optional backup trial search from WHO ICTRP API.
Returns simplified summaries for readability.
"""
try:
q = _normalize_query(query)
url = f"https://trialsearch.who.int/api/TrialSearch?query={q}"
res = requests.get(url, timeout=10)
if res.status_code != 200:
return "WHO ICTRP API unavailable or throttled."
trials = res.json().get("TrialSearchResult", [])
if not trials:
return f"No WHO trials found for {html.escape(q)}."
formatted = []
for t in trials[:limit]:
title = html.escape(t.get("Scientific_title", "Untitled"))
registry = html.escape(t.get("Register", ""))
country = html.escape(t.get("Recruitment_Country", ""))
formatted.append(
f"{title}
{registry or 'Registry Unknown'} — {country or 'N/A'}"
)
return "
".join(formatted)
except Exception as e:
traceback.print_exc()
return f"WHO ICTRP fetch failed for {html.escape(query)}: {e}"