Spaces:
Sleeping
Sleeping
File size: 7,480 Bytes
9788b7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
"""
utils/api_clients.py
------------------------------------------------
Enhanced API clients for:
- PubMed (NCBI)
- ClinicalTrials.gov
- FDA Open Data
- WHO ICTRP
------------------------------------------------
Optimized for hybrid VAN-based query processing:
- Automatically truncates long queries (top keywords only)
- Resilient to API downtime or malformed responses
- HTML formatted results for Gradio rendering
"""
import requests
import html
import re
import traceback
# ============================================================
# 🔹 Query Normalization
# ============================================================
def _normalize_query(query: str, max_words: int = 5) -> str:
"""
Cleans and shortens user query for API compatibility.
Removes filler phrases and limits to key words.
"""
q = query.lower()
q = re.sub(
r"(what is|define|explain|describe|in clinical trials|the meaning of|tell me about|explanation of|concept of)\b",
"",
q,
)
q = re.sub(r"[^a-z0-9\s]", "", q)
q = re.sub(r"\s+", " ", q).strip()
# limit to first few words (avoid 404s from overlong queries)
words = q.split()
q = " ".join(words[:max_words])
return q or "clinical trial"
# ============================================================
# 🔹 PubMed API (NCBI E-Utilities)
# ============================================================
def fetch_pubmed(query: str, limit: int = 3) -> str:
try:
q = _normalize_query(query)
base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
esearch = f"{base}esearch.fcgi?db=pubmed&term={q}&retmax={limit}&retmode=json"
res = requests.get(esearch, timeout=10)
res.raise_for_status()
ids = res.json().get("esearchresult", {}).get("idlist", [])
if not ids:
return f"<i>No PubMed results found for <b>{html.escape(q)}</b>.</i>"
summaries = []
for pmid in ids:
summary_url = f"{base}esummary.fcgi?db=pubmed&id={pmid}&retmode=json"
sres = requests.get(summary_url, timeout=10)
sres.raise_for_status()
doc = sres.json()["result"].get(pmid, {})
title = html.escape(doc.get("title", "Untitled"))
source = html.escape(doc.get("source", ""))
pubdate = html.escape(doc.get("pubdate", ""))
link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
summaries.append(
f"<b>{title}</b><br>{source} ({pubdate})<br>"
f"<a href='{link}' target='_blank'>[PubMed]</a>"
)
return "<br><br>".join(summaries)
except Exception as e:
traceback.print_exc()
return f"<i>PubMed fetch failed for <b>{html.escape(query)}</b>: {e}</i>"
# ============================================================
# 🔹 ClinicalTrials.gov API
# ============================================================
def fetch_clinicaltrials(query: str, limit: int = 3) -> str:
"""
Retrieves brief summaries of matching trials from ClinicalTrials.gov.
Automatically truncates query to avoid 404s on long input.
"""
try:
q = _normalize_query(query)
url = (
f"https://clinicaltrials.gov/api/query/study_fields?"
f"expr={q}&fields=NCTId,BriefTitle,Condition,OverallStatus"
f"&max_rnk={limit}&fmt=json"
)
res = requests.get(url, timeout=10)
res.raise_for_status()
studies = res.json().get("StudyFieldsResponse", {}).get("StudyFields", [])
if not studies:
return f"<i>No trials found for <b>{html.escape(q)}</b>.</i>"
formatted = []
for s in studies:
nct = s.get("NCTId", [""])[0]
title = html.escape(s.get("BriefTitle", [""])[0])
condition = html.escape(", ".join(s.get("Condition", [])))
status = html.escape(s.get("OverallStatus", ["Unknown"])[0])
link = f"https://clinicaltrials.gov/study/{nct}" if nct else "#"
formatted.append(
f"<b>{title}</b><br>"
f"Condition: {condition or 'N/A'}<br>"
f"Status: {status}<br>"
f"<a href='{link}' target='_blank'>[ClinicalTrials.gov]</a>"
)
return "<br><br>".join(formatted)
except Exception as e:
traceback.print_exc()
return f"<i>ClinicalTrials.gov fetch failed for <b>{html.escape(query)}</b>: {e}</i>"
# ============================================================
# 🔹 FDA Open Data API
# ============================================================
def fetch_fda(query: str, limit: int = 3) -> str:
"""
Retrieves FDA label and safety data for a given compound/drug name.
"""
try:
q = _normalize_query(query)
url = f"https://api.fda.gov/drug/label.json?search=openfda.brand_name:{q}&limit={limit}"
res = requests.get(url, timeout=10)
if res.status_code == 404:
return f"<i>No FDA data found for <b>{html.escape(q)}</b>.</i>"
res.raise_for_status()
data = res.json().get("results", [])
if not data:
return f"<i>No FDA label results found for <b>{html.escape(q)}</b>.</i>"
formatted = []
for entry in data:
brand = ", ".join(entry.get("openfda", {}).get("brand_name", []))
generic = ", ".join(entry.get("openfda", {}).get("generic_name", []))
purpose = html.escape(" ".join(entry.get("purpose", [])[:1]))
warnings = html.escape(" ".join(entry.get("warnings", [])[:1]))
link = "https://open.fda.gov/drug/label/"
formatted.append(
f"<b>{brand or q}</b> ({generic or 'N/A'})<br>"
f"<u>Purpose:</u> {purpose or 'N/A'}<br>"
f"<u>Warning:</u> {warnings or 'N/A'}<br>"
f"<a href='{link}' target='_blank'>[FDA Label]</a>"
)
return "<br><br>".join(formatted)
except Exception as e:
traceback.print_exc()
return f"<i>FDA fetch failed for <b>{html.escape(query)}</b>: {e}</i>"
# ============================================================
# 🔹 WHO ICTRP (Backup Trial Source)
# ============================================================
def fetch_who_trials(query: str, limit: int = 2) -> str:
"""
Optional backup trial search from WHO ICTRP API.
Returns simplified summaries for readability.
"""
try:
q = _normalize_query(query)
url = f"https://trialsearch.who.int/api/TrialSearch?query={q}"
res = requests.get(url, timeout=10)
if res.status_code != 200:
return "<i>WHO ICTRP API unavailable or throttled.</i>"
trials = res.json().get("TrialSearchResult", [])
if not trials:
return f"<i>No WHO trials found for <b>{html.escape(q)}</b>.</i>"
formatted = []
for t in trials[:limit]:
title = html.escape(t.get("Scientific_title", "Untitled"))
registry = html.escape(t.get("Register", ""))
country = html.escape(t.get("Recruitment_Country", ""))
formatted.append(
f"<b>{title}</b><br>{registry or 'Registry Unknown'} — {country or 'N/A'}"
)
return "<br><br>".join(formatted)
except Exception as e:
traceback.print_exc()
return f"<i>WHO ICTRP fetch failed for <b>{html.escape(query)}</b>: {e}</i>"
|