Spaces:
Running
Running
| """ | |
| utils/api_clients.py | |
| ------------------------------------------------ | |
| Enhanced API clients for: | |
| - PubMed (NCBI) | |
| - ClinicalTrials.gov | |
| - FDA Open Data | |
| - WHO ICTRP | |
| ------------------------------------------------ | |
| Optimized for hybrid VAN-based query processing: | |
| - Automatically truncates long queries (top keywords only) | |
| - Resilient to API downtime or malformed responses | |
| - HTML formatted results for Gradio rendering | |
| """ | |
| import requests | |
| import html | |
| import re | |
| import traceback | |
| # ============================================================ | |
| # 🔹 Query Normalization | |
| # ============================================================ | |
| def _normalize_query(query: str, max_words: int = 5) -> str: | |
| """ | |
| Cleans and shortens user query for API compatibility. | |
| Removes filler phrases and limits to key words. | |
| """ | |
| q = query.lower() | |
| q = re.sub( | |
| r"(what is|define|explain|describe|in clinical trials|the meaning of|tell me about|explanation of|concept of)\b", | |
| "", | |
| q, | |
| ) | |
| q = re.sub(r"[^a-z0-9\s]", "", q) | |
| q = re.sub(r"\s+", " ", q).strip() | |
| # limit to first few words (avoid 404s from overlong queries) | |
| words = q.split() | |
| q = " ".join(words[:max_words]) | |
| return q or "clinical trial" | |
| # ============================================================ | |
| # 🔹 PubMed API (NCBI E-Utilities) | |
| # ============================================================ | |
| def fetch_pubmed(query: str, limit: int = 3) -> str: | |
| try: | |
| q = _normalize_query(query) | |
| base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" | |
| esearch = f"{base}esearch.fcgi?db=pubmed&term={q}&retmax={limit}&retmode=json" | |
| res = requests.get(esearch, timeout=10) | |
| res.raise_for_status() | |
| ids = res.json().get("esearchresult", {}).get("idlist", []) | |
| if not ids: | |
| return f"<i>No PubMed results found for <b>{html.escape(q)}</b>.</i>" | |
| summaries = [] | |
| for pmid in ids: | |
| summary_url = f"{base}esummary.fcgi?db=pubmed&id={pmid}&retmode=json" | |
| sres = requests.get(summary_url, timeout=10) | |
| sres.raise_for_status() | |
| doc = sres.json()["result"].get(pmid, {}) | |
| title = html.escape(doc.get("title", "Untitled")) | |
| source = html.escape(doc.get("source", "")) | |
| pubdate = html.escape(doc.get("pubdate", "")) | |
| link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" | |
| summaries.append( | |
| f"<b>{title}</b><br>{source} ({pubdate})<br>" | |
| f"<a href='{link}' target='_blank'>[PubMed]</a>" | |
| ) | |
| return "<br><br>".join(summaries) | |
| except Exception as e: | |
| traceback.print_exc() | |
| return f"<i>PubMed fetch failed for <b>{html.escape(query)}</b>: {e}</i>" | |
| # ============================================================ | |
| # 🔹 ClinicalTrials.gov API | |
| # ============================================================ | |
| def fetch_clinicaltrials(query: str, limit: int = 3) -> str: | |
| """ | |
| Retrieves brief summaries of matching trials from ClinicalTrials.gov. | |
| Automatically truncates query to avoid 404s on long input. | |
| """ | |
| try: | |
| q = _normalize_query(query) | |
| url = ( | |
| f"https://clinicaltrials.gov/api/query/study_fields?" | |
| f"expr={q}&fields=NCTId,BriefTitle,Condition,OverallStatus" | |
| f"&max_rnk={limit}&fmt=json" | |
| ) | |
| res = requests.get(url, timeout=10) | |
| res.raise_for_status() | |
| studies = res.json().get("StudyFieldsResponse", {}).get("StudyFields", []) | |
| if not studies: | |
| return f"<i>No trials found for <b>{html.escape(q)}</b>.</i>" | |
| formatted = [] | |
| for s in studies: | |
| nct = s.get("NCTId", [""])[0] | |
| title = html.escape(s.get("BriefTitle", [""])[0]) | |
| condition = html.escape(", ".join(s.get("Condition", []))) | |
| status = html.escape(s.get("OverallStatus", ["Unknown"])[0]) | |
| link = f"https://clinicaltrials.gov/study/{nct}" if nct else "#" | |
| formatted.append( | |
| f"<b>{title}</b><br>" | |
| f"Condition: {condition or 'N/A'}<br>" | |
| f"Status: {status}<br>" | |
| f"<a href='{link}' target='_blank'>[ClinicalTrials.gov]</a>" | |
| ) | |
| return "<br><br>".join(formatted) | |
| except Exception as e: | |
| traceback.print_exc() | |
| return f"<i>ClinicalTrials.gov fetch failed for <b>{html.escape(query)}</b>: {e}</i>" | |
| # ============================================================ | |
| # 🔹 FDA Open Data API | |
| # ============================================================ | |
| def fetch_fda(query: str, limit: int = 3) -> str: | |
| """ | |
| Retrieves FDA label and safety data for a given compound/drug name. | |
| """ | |
| try: | |
| q = _normalize_query(query) | |
| url = f"https://api.fda.gov/drug/label.json?search=openfda.brand_name:{q}&limit={limit}" | |
| res = requests.get(url, timeout=10) | |
| if res.status_code == 404: | |
| return f"<i>No FDA data found for <b>{html.escape(q)}</b>.</i>" | |
| res.raise_for_status() | |
| data = res.json().get("results", []) | |
| if not data: | |
| return f"<i>No FDA label results found for <b>{html.escape(q)}</b>.</i>" | |
| formatted = [] | |
| for entry in data: | |
| brand = ", ".join(entry.get("openfda", {}).get("brand_name", [])) | |
| generic = ", ".join(entry.get("openfda", {}).get("generic_name", [])) | |
| purpose = html.escape(" ".join(entry.get("purpose", [])[:1])) | |
| warnings = html.escape(" ".join(entry.get("warnings", [])[:1])) | |
| link = "https://open.fda.gov/drug/label/" | |
| formatted.append( | |
| f"<b>{brand or q}</b> ({generic or 'N/A'})<br>" | |
| f"<u>Purpose:</u> {purpose or 'N/A'}<br>" | |
| f"<u>Warning:</u> {warnings or 'N/A'}<br>" | |
| f"<a href='{link}' target='_blank'>[FDA Label]</a>" | |
| ) | |
| return "<br><br>".join(formatted) | |
| except Exception as e: | |
| traceback.print_exc() | |
| return f"<i>FDA fetch failed for <b>{html.escape(query)}</b>: {e}</i>" | |
| # ============================================================ | |
| # 🔹 WHO ICTRP (Backup Trial Source) | |
| # ============================================================ | |
| def fetch_who_trials(query: str, limit: int = 2) -> str: | |
| """ | |
| Optional backup trial search from WHO ICTRP API. | |
| Returns simplified summaries for readability. | |
| """ | |
| try: | |
| q = _normalize_query(query) | |
| url = f"https://trialsearch.who.int/api/TrialSearch?query={q}" | |
| res = requests.get(url, timeout=10) | |
| if res.status_code != 200: | |
| return "<i>WHO ICTRP API unavailable or throttled.</i>" | |
| trials = res.json().get("TrialSearchResult", []) | |
| if not trials: | |
| return f"<i>No WHO trials found for <b>{html.escape(q)}</b>.</i>" | |
| formatted = [] | |
| for t in trials[:limit]: | |
| title = html.escape(t.get("Scientific_title", "Untitled")) | |
| registry = html.escape(t.get("Register", "")) | |
| country = html.escape(t.get("Recruitment_Country", "")) | |
| formatted.append( | |
| f"<b>{title}</b><br>{registry or 'Registry Unknown'} — {country or 'N/A'}" | |
| ) | |
| return "<br><br>".join(formatted) | |
| except Exception as e: | |
| traceback.print_exc() | |
| return f"<i>WHO ICTRP fetch failed for <b>{html.escape(query)}</b>: {e}</i>" | |