MedDataSearchAgent

Sleeping

App Files Files Community

SadiaK14 commited on 4 days ago

Commit

15c7a33

verified ·

1 Parent(s): d6d0dda

Update app.py

Browse files

Files changed (1) hide show

app.py +229 -119

app.py CHANGED Viewed

@@ -1,16 +1,109 @@
-from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, load_tool, tool
-import datetime
-import requests
-import pytz
-import yaml
-from tools.final_answer import FinalAnswerTool
-from Gradio_UI import GradioUI
-# Custom Tool to fetch datasets related to body parts or imaging types
 # @tool
 # def my_custom_tool(arg1: str, arg2: int) -> str:
@@ -28,83 +121,130 @@ from Gradio_UI import GradioUI
 #         keyword = arg1.strip().lower()
 #         limit = int(arg2)
-#         # Define a basic list of medically relevant terms
 #         medical_terms = [
-#             # Anatomy / Body Parts
 #             "skin", "brain", "lung", "chest", "abdomen", "spine", "bone", "heart", "liver", "kidney",
-#             "bladder", "stomach", "colon", "rectum", "esophagus", "pancreas", "breast", "ear", "eye",
-#             "retina", "tooth", "teeth", "tongue", "jaw", "neck", "wrist", "hand", "leg", "arm", "shoulder", "pelvis",
-#             # Diseases / Conditions
-#             "cancer", "tumor", "stroke", "diabetes", "pneumonia", "covid", "asthma", "eczema", "melanoma",
-#             "hypertension", "alzheimer", "parkinson", "arthritis", "scoliosis", "epilepsy", "glaucoma",
-#             "ulcer", "hepatitis", "leukemia", "lymphoma", "tuberculosis", "anemia", "obesity", "depression",
-#             "anxiety", "bipolar", "autism", "adhd", "ptsd", "psychosis", "schizophrenia",
-#             # Imaging Modalities
-#             "mri", "ct", "xray", "x-ray", "ultrasound", "pet", "fmri", "mammo", "angiography", "radiography",
-#             "echocardiogram", "spect", "dermoscopy", "colonoscopy", "endoscopy", "biopsy", "histopathology",
-#             # Medical Specialties
 #             "radiology", "pathology", "oncology", "cardiology", "neurology", "dermatology", "dentistry",
 #             "ophthalmology", "urology", "orthopedics", "gastroenterology", "pulmonology", "nephrology",
 #             "psychiatry", "pediatrics", "geriatrics", "infectious disease",
-#             # Symptoms / Signs
-#             "lesion", "infection", "fever", "pain", "inflammation", "rash", "headache", "swelling",
-#             "cough", "seizure", "dizziness", "vomiting", "diarrhea", "nausea", "fatigue", "itching",
-#             # Common Specific Diseases
-#             "breast cancer", "prostate cancer", "lung cancer", "skin cancer", "colon cancer",
-#             "brain tumor", "liver cancer", "cervical cancer", "bladder cancer", "thyroid cancer",
-#             # Procedures / Interventions
-#             "surgery", "chemotherapy", "radiation", "transplant", "dialysis", "intubation", "stenting",
-#             "ventilation", "vaccination", "anesthesia", "rehabilitation", "prosthetics", "orthotics",
-#             # Lab Tests / Biomarkers
-#             "blood test", "cbc", "glucose", "hemoglobin", "cholesterol", "biomarker", "urinalysis",
-#             "pcr", "serology", "antibody", "antigen",
-#             # Clinical Settings / Roles
-#             "icu", "hospital", "emergency", "clinical notes", "nursing", "physician", "patient",
-#             "medical record", "electronic health record", "ehr", "vitals",
-#             # Age-based Terms
-#             "pediatric", "neonatal", "infant", "child", "adolescent", "geriatrics", "elderly",
-#             # Epidemiology / Public Health
-#             "epidemiology", "prevalence", "incidence", "mortality", "public health", "health disparity",
-#             "risk factor", "social determinant",
-#             # Pharmacology / Medications
-#             "drug", "medication", "pharmacology", "side effect", "adverse event", "dose", "tablet",
-#             "vaccine", "clinical trial", "placebo"
 #         ]
-#         # Check if keyword is in known medical terms
 #         if not any(term in keyword for term in medical_terms):
-#             return f"No medical datasets found for '{arg1}'."
-#         # Fetch datasets from Hugging Face
-#         response = requests.get(
-#             f"https://huggingface.co/api/datasets?search={keyword}&limit={limit}"
-#         )
-#         response.raise_for_status()
-#         datasets = response.json()
-#         # Return message if no datasets found
 #         if not datasets:
-#             return f"No medical datasets found for '{arg1}'."
-#         # Collect and return dataset names
 #         results = [f"- {ds.get('id', 'Unknown')}" for ds in datasets[:limit]]
 #         return f"Medical datasets related to '{arg1}':\n" + "\n".join(results)
 #     except Exception as e:
 #         return f"Error searching medical datasets for '{arg1}': {str(e)}"
 @tool
 def my_custom_tool(arg1: str, arg2: int) -> str:
     """
@@ -121,21 +261,15 @@ def my_custom_tool(arg1: str, arg2: int) -> str:
         keyword = arg1.strip().lower()
         limit = int(arg2)
-        # Define a list of medical terms
         medical_terms = [
-            "skin", "brain", "lung", "chest", "abdomen", "spine", "bone", "heart", "liver", "kidney",
-            "bladder", "stomach", "colon", "rectum", "esophagus", "pancreas", "breast", "ear", "eye",
-            "radiology", "pathology", "oncology", "cardiology", "neurology", "dermatology", "dentistry",
-            "ophthalmology", "urology", "orthopedics", "gastroenterology", "pulmonology", "nephrology",
-            "psychiatry", "pediatrics", "geriatrics", "infectious disease",
-            "mri", "ct", "xray", "x-ray", "ultrasound", "pet", "fmri", "mammo", "angiography", "radiography",
-            "cancer", "tumor", "stroke", "diabetes", "melanoma", "eczema", "asthma", "thyroid"
         ]
         if not any(term in keyword for term in medical_terms):
             return f"No medical datasets found for '{arg1}'. Please try another medical term."
-        # Try online query to Hugging Face
         try:
             response = requests.get(
                 f"https://huggingface.co/api/datasets?search={keyword}&limit={limit}",
@@ -144,10 +278,8 @@ def my_custom_tool(arg1: str, arg2: int) -> str:
             response.raise_for_status()
             datasets = response.json()
         except Exception:
-            # Network-restricted fallback
             datasets = [{"id": f"example/{keyword}-dataset-{i+1}"} for i in range(limit)]
-        # Return formatted list
         if not datasets:
             return f"No datasets found for '{arg1}'."
@@ -158,17 +290,13 @@ def my_custom_tool(arg1: str, arg2: int) -> str:
         return f"Error searching medical datasets for '{arg1}': {str(e)}"
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
     """
-    A tool that fetches the current local time in a specified timezone.
     Args:
         timezone: A string representing a valid timezone (e.g., 'America/New_York').
     Returns:
         A string showing the current local time in the specified timezone.
     """
@@ -179,43 +307,27 @@ def get_current_time_in_timezone(timezone: str) -> str:
     except Exception as e:
         return f"Error fetching time for timezone '{timezone}': {str(e)}"
-final_answer = FinalAnswerTool()
-# AI Model
-# model = InferenceClientModel(
-#     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
-#     temperature=0.5,
-#     max_output_tokens=2048  # optional, safe alternative
-# )
 model = InferenceClientModel(
     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
-    temperature=0.5,
 )
-# Load tool from hub
-# image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
-# Load prompt templates
 with open("prompts.yaml", 'r') as stream:
     prompt_templates = yaml.safe_load(stream)
-# # Create the agent
-# agent = CodeAgent(
-#     model=model,
-#     tools=[final_answer, get_current_time_in_timezone, my_custom_tool],
-#     max_steps=6,
-#     verbosity_level=2,
-#     planning_interval=None,
-#     name=None,
-#     description=None,
-#     prompt_templates=prompt_templates
-# )
 agent = CodeAgent(
     model=model,
@@ -224,14 +336,12 @@ agent = CodeAgent(
     verbosity_level=1,
     planning_interval=None,
     name="MedDataSearchAgent",
-    description=(
-        "An intelligent agent that searches Hugging Face datasets related to "
-        "medical conditions, body parts, and imaging modalities. "
-        "Use 'my_custom_tool' whenever the user requests medical data or datasets."
-    ),
     prompt_templates=prompt_templates
 )
-# Launch the UI
 GradioUI(agent).launch()

+# from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, load_tool, tool
+# import datetime
+# import requests
+# import pytz
+# import yaml
+# from tools.final_answer import FinalAnswerTool
+# from Gradio_UI import GradioUI
+# # Custom Tool to fetch datasets related to body parts or imaging types
+# # @tool
+# # def my_custom_tool(arg1: str, arg2: int) -> str:
+# #     """
+# #     Search and retrieve publicly available medical datasets from Hugging Face based on any medical-related keyword.
+# #     Args:
+# #         arg1: A keyword related to medical data (e.g., 'cancer', 'diabetes', 'CT scan', 'radiology', 'dermoscopy').
+# #         arg2: The maximum number of datasets to retrieve.
+# #     Returns:
+# #         A list of dataset names matching the search query, or a message stating that no datasets were found.
+# #     """
+# #     try:
+# #         keyword = arg1.strip().lower()
+# #         limit = int(arg2)
+# #         # Define a basic list of medically relevant terms
+# #         medical_terms = [
+# #             # Anatomy / Body Parts
+# #             "skin", "brain", "lung", "chest", "abdomen", "spine", "bone", "heart", "liver", "kidney",
+# #             "bladder", "stomach", "colon", "rectum", "esophagus", "pancreas", "breast", "ear", "eye",
+# #             "retina", "tooth", "teeth", "tongue", "jaw", "neck", "wrist", "hand", "leg", "arm", "shoulder", "pelvis",
+# #             # Diseases / Conditions
+# #             "cancer", "tumor", "stroke", "diabetes", "pneumonia", "covid", "asthma", "eczema", "melanoma",
+# #             "hypertension", "alzheimer", "parkinson", "arthritis", "scoliosis", "epilepsy", "glaucoma",
+# #             "ulcer", "hepatitis", "leukemia", "lymphoma", "tuberculosis", "anemia", "obesity", "depression",
+# #             "anxiety", "bipolar", "autism", "adhd", "ptsd", "psychosis", "schizophrenia",
+# #             # Imaging Modalities
+# #             "mri", "ct", "xray", "x-ray", "ultrasound", "pet", "fmri", "mammo", "angiography", "radiography",
+# #             "echocardiogram", "spect", "dermoscopy", "colonoscopy", "endoscopy", "biopsy", "histopathology",
+# #             # Medical Specialties
+# #             "radiology", "pathology", "oncology", "cardiology", "neurology", "dermatology", "dentistry",
+# #             "ophthalmology", "urology", "orthopedics", "gastroenterology", "pulmonology", "nephrology",
+# #             "psychiatry", "pediatrics", "geriatrics", "infectious disease",
+# #             # Symptoms / Signs
+# #             "lesion", "infection", "fever", "pain", "inflammation", "rash", "headache", "swelling",
+# #             "cough", "seizure", "dizziness", "vomiting", "diarrhea", "nausea", "fatigue", "itching",
+# #             # Common Specific Diseases
+# #             "breast cancer", "prostate cancer", "lung cancer", "skin cancer", "colon cancer",
+# #             "brain tumor", "liver cancer", "cervical cancer", "bladder cancer", "thyroid cancer",
+# #             # Procedures / Interventions
+# #             "surgery", "chemotherapy", "radiation", "transplant", "dialysis", "intubation", "stenting",
+# #             "ventilation", "vaccination", "anesthesia", "rehabilitation", "prosthetics", "orthotics",
+# #             # Lab Tests / Biomarkers
+# #             "blood test", "cbc", "glucose", "hemoglobin", "cholesterol", "biomarker", "urinalysis",
+# #             "pcr", "serology", "antibody", "antigen",
+# #             # Clinical Settings / Roles
+# #             "icu", "hospital", "emergency", "clinical notes", "nursing", "physician", "patient",
+# #             "medical record", "electronic health record", "ehr", "vitals",
+# #             # Age-based Terms
+# #             "pediatric", "neonatal", "infant", "child", "adolescent", "geriatrics", "elderly",
+# #             # Epidemiology / Public Health
+# #             "epidemiology", "prevalence", "incidence", "mortality", "public health", "health disparity",
+# #             "risk factor", "social determinant",
+# #             # Pharmacology / Medications
+# #             "drug", "medication", "pharmacology", "side effect", "adverse event", "dose", "tablet",
+# #             "vaccine", "clinical trial", "placebo"
+# #         ]
+# #         # Check if keyword is in known medical terms
+# #         if not any(term in keyword for term in medical_terms):
+# #             return f"No medical datasets found for '{arg1}'."
+# #         # Fetch datasets from Hugging Face
+# #         response = requests.get(
+# #             f"https://huggingface.co/api/datasets?search={keyword}&limit={limit}"
+# #         )
+# #         response.raise_for_status()
+# #         datasets = response.json()
+# #         # Return message if no datasets found
+# #         if not datasets:
+# #             return f"No medical datasets found for '{arg1}'."
+# #         # Collect and return dataset names
+# #         results = [f"- {ds.get('id', 'Unknown')}" for ds in datasets[:limit]]
+# #         return f"Medical datasets related to '{arg1}':\n" + "\n".join(results)
+# #     except Exception as e:
+# #         return f"Error searching medical datasets for '{arg1}': {str(e)}"
 # @tool
 # def my_custom_tool(arg1: str, arg2: int) -> str:
 #         keyword = arg1.strip().lower()
 #         limit = int(arg2)
+#         # Define a list of medical terms
 #         medical_terms = [
 #             "skin", "brain", "lung", "chest", "abdomen", "spine", "bone", "heart", "liver", "kidney",
+#             "bladder", "stomach", "colon", "rectum", "esophagus", "pancreas", "breast", "ear", "eye",
 #             "radiology", "pathology", "oncology", "cardiology", "neurology", "dermatology", "dentistry",
 #             "ophthalmology", "urology", "orthopedics", "gastroenterology", "pulmonology", "nephrology",
 #             "psychiatry", "pediatrics", "geriatrics", "infectious disease",
+#             "mri", "ct", "xray", "x-ray", "ultrasound", "pet", "fmri", "mammo", "angiography", "radiography",
+#             "cancer", "tumor", "stroke", "diabetes", "melanoma", "eczema", "asthma", "thyroid"
 #         ]
 #         if not any(term in keyword for term in medical_terms):
+#             return f"No medical datasets found for '{arg1}'. Please try another medical term."
+#         # Try online query to Hugging Face
+#         try:
+#             response = requests.get(
+#                 f"https://huggingface.co/api/datasets?search={keyword}&limit={limit}",
+#                 timeout=10
+#             )
+#             response.raise_for_status()
+#             datasets = response.json()
+#         except Exception:
+#             # Network-restricted fallback
+#             datasets = [{"id": f"example/{keyword}-dataset-{i+1}"} for i in range(limit)]
+#         # Return formatted list
 #         if not datasets:
+#             return f"No datasets found for '{arg1}'."
 #         results = [f"- {ds.get('id', 'Unknown')}" for ds in datasets[:limit]]
 #         return f"Medical datasets related to '{arg1}':\n" + "\n".join(results)
 #     except Exception as e:
 #         return f"Error searching medical datasets for '{arg1}': {str(e)}"
+# @tool
+# def get_current_time_in_timezone(timezone: str) -> str:
+#     """
+#     A tool that fetches the current local time in a specified timezone.
+#     Args:
+#         timezone: A string representing a valid timezone (e.g., 'America/New_York').
+#     Returns:
+#         A string showing the current local time in the specified timezone.
+#     """
+#     try:
+#         tz = pytz.timezone(timezone)
+#         local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
+#         return f"The current local time in {timezone} is: {local_time}"
+#     except Exception as e:
+#         return f"Error fetching time for timezone '{timezone}': {str(e)}"
+# final_answer = FinalAnswerTool()
+# # AI Model
+# # model = InferenceClientModel(
+# #     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+# #     temperature=0.5,
+# #     max_output_tokens=2048  # optional, safe alternative
+# # )
+# model = InferenceClientModel(
+#     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+#     temperature=0.5,
+# )
+# # Load tool from hub
+# # image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
+# # Load prompt templates
+# with open("prompts.yaml", 'r') as stream:
+#     prompt_templates = yaml.safe_load(stream)
+# # # Create the agent
+# # agent = CodeAgent(
+# #     model=model,
+# #     tools=[final_answer, get_current_time_in_timezone, my_custom_tool],
+# #     max_steps=6,
+# #     verbosity_level=2,
+# #     planning_interval=None,
+# #     name=None,
+# #     description=None,
+# #     prompt_templates=prompt_templates
+# # )
+# agent = CodeAgent(
+#     model=model,
+#     tools=[final_answer, get_current_time_in_timezone, my_custom_tool],
+#     max_steps=6,
+#     verbosity_level=1,
+#     planning_interval=None,
+#     name="MedDataSearchAgent",
+#     description=(
+#         "An intelligent agent that searches Hugging Face datasets related to "
+#         "medical conditions, body parts, and imaging modalities. "
+#         "Use 'my_custom_tool' whenever the user requests medical data or datasets."
+#     ),
+#     prompt_templates=prompt_templates
+# )
+# # Launch the UI
+# GradioUI(agent).launch()
+# app.py
+from smolagents import CodeAgent, InferenceClientModel, load_tool, tool
+import datetime
+import requests
+import pytz
+import yaml
+from tools.final_answer import FinalAnswerTool
+from Gradio_UI import GradioUI
 @tool
 def my_custom_tool(arg1: str, arg2: int) -> str:
     """
         keyword = arg1.strip().lower()
         limit = int(arg2)
         medical_terms = [
+            "skin", "brain", "lung", "chest", "abdomen", "spine", "bone", "heart", "liver",
+            "radiology", "pathology", "oncology", "dermatology", "mri", "ct", "xray", "ultrasound",
+            "cancer", "tumor", "melanoma", "eczema", "thyroid"
         ]
         if not any(term in keyword for term in medical_terms):
             return f"No medical datasets found for '{arg1}'. Please try another medical term."
         try:
             response = requests.get(
                 f"https://huggingface.co/api/datasets?search={keyword}&limit={limit}",
             response.raise_for_status()
             datasets = response.json()
         except Exception:
             datasets = [{"id": f"example/{keyword}-dataset-{i+1}"} for i in range(limit)]
         if not datasets:
             return f"No datasets found for '{arg1}'."
         return f"Error searching medical datasets for '{arg1}': {str(e)}"
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
     """
+    Get the current local time in a specified timezone.
     Args:
         timezone: A string representing a valid timezone (e.g., 'America/New_York').
     Returns:
         A string showing the current local time in the specified timezone.
     """
     except Exception as e:
         return f"Error fetching time for timezone '{timezone}': {str(e)}"
+final_answer = FinalAnswerTool()
 model = InferenceClientModel(
     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+    temperature=0.5
 )
 with open("prompts.yaml", 'r') as stream:
     prompt_templates = yaml.safe_load(stream)
+# --- 🔥 Critical: Enforce dataset search behavior ---
+SYSTEM_PROMPT_APPEND = """
+Whenever the user query includes medical conditions (like cancer, tumor, radiology, MRI, CT, ultrasound, pathology, or skin),
+you MUST call the `my_custom_tool` function to search Hugging Face datasets instead of writing an explanation.
+Always call it with arguments (arg1=<the keyword>, arg2=5).
+Do NOT attempt to scrape websites, import modules, or fetch random text.
+If the query is not medical, behave normally.
+"""
 agent = CodeAgent(
     model=model,
     verbosity_level=1,
     planning_interval=None,
     name="MedDataSearchAgent",
+    description="An intelligent agent that searches Hugging Face for medical datasets and returns structured results.",
     prompt_templates=prompt_templates
 )
+# Inject custom enforcement into system prompt
+agent.prompt_templates["system_prompt"] += "\n" + SYSTEM_PROMPT_APPEND
 GradioUI(agent).launch()