Spaces:

OctoTools
/

octotools

Runtime error

App Files Files Community

octotools / opentools /tools /generalist_solution_generator /tool.py

bowenchen118

Update

d2beadd 9 months ago

raw

history blame

8.47 kB

	import os
	from opentools.tools.base import BaseTool
	from opentools.engine.openai import ChatOpenAI

	class Generalist_Solution_Generator_Tool(BaseTool):
	require_llm_engine = True

	def __init__(self, model_string="gpt-4o-mini"):
	super().__init__(
	tool_name="Generalist_Solution_Generator_Tool",
	tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
	tool_version="1.0.0",
	input_types={
	"prompt": "str - The prompt that includes query from the user to guide the agent to generate response (Examples: 'Describe this image in detail').",
	"image": "str - The path to the image file if applicable (default: None).",
	},
	output_type="str - The generated response to the original query prompt",
	demo_commands=[
	{
	"command": 'execution = tool.execute(prompt="Summarize the following text in a few lines")',
	"description": "Generate a short summary given the prompt from the user."
	},
	{
	"command": 'execution = tool.execute(prompt="Explain the mood of this scene.", image="path/to/image1.png")',
	"description": "Generate a caption focusing on the mood using a specific prompt and image."
	},
	{
	"command": 'execution = tool.execute(prompt="Give your best coordinate estimate for the pacemaker in the image and return (x1, y1, x2, y2)", image="path/to/image2.png")',
	"description": "Generate bounding box coordinates given the image and prompt from the user. The format should be (x1, y1, x2, y2)."
	},
	{
	"command": 'execution = tool.execute(prompt="Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?", image="path/to/image2.png")',
	"description": "Answer a question step by step given the image."
	}
	],
	# # vesion 0 (bowen) (Generalist: %; 6 Tools: %; Generalist + 6 Tools: %)
	# user_metadata = {
	# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
	# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge. For optimal results: 1) Provide clear, specific prompts. 2) Use it as a starting point for complex tasks, then refine with specialized tools. 3) Verify important information from its responses. 4) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
	# }
	# vesion 2 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 54%)
	user_metadata = {
	"limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
	"best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
	"1) Provide clear, specific prompts.\n"
	"2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
	"3) For complex queries, break them down into subtasks and use the tool multiple times.\n"
	"4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
	"5) Verify important information from its responses.\n"
	"6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
	}
	# # vesion 6 (Generalist: 70%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
	# user_metadata = {
	# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
	# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
	# "1) Provide clear, specific prompts.\n"
	# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
	# "3) For complex queries, break them down into smaller, focused sub-tasks and use the tool multiple times.\n"
	# "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
	# "5) Verify important information from its responses.\n"
	# "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
	# }
	# # vesion 8 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
	# user_metadata = {
	# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
	# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
	# "1) Provide clear, specific prompts.\n"
	# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
	# "3) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
	# "4) Verify important information from its responses.\n"
	# "5) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
	# }
	)
	self.model_string = model_string

	def execute(self, prompt, image=None):

	print(f"\nInitializing Generalist Tool with model: {self.model_string}")
	multimodal = True if image else False
	llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal)

	try:
	input_data = [prompt]
	if multimodal:
	if not os.path.isfile(image):
	return "Error: Invalid image file path."
	try:
	with open(image, 'rb') as file:
	image_bytes = file.read()
	input_data.append(image_bytes)
	except Exception as e:
	return f"Error reading image file: {str(e)}"

	response = llm_engine(input_data)
	else:
	response = llm_engine(input_data[0])
	return response
	except Exception as e:
	return f"Error generating response: {str(e)}"

	def get_metadata(self):
	metadata = super().get_metadata()
	return metadata

	if __name__ == "__main__":
	# Test command:
	"""
	Run the following commands in the terminal to test the script:

	cd opentools
	python tools/default/tool.py
	"""

	# Get the directory of the current script
	script_dir = os.path.dirname(os.path.abspath(__file__))
	print(f"Script directory: {script_dir}")

	# Example usage of the Generalist_Tool
	tool = Generalist_Solution_Generator_Tool()
	# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o-mini")
	# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o")

	# Get tool metadata
	metadata = tool.get_metadata()
	print(metadata)

	# Construct the full path to the image using the script's directory
	relative_image_path = "../../tasks/minitoolbench/data/mathvista_113.png"
	relative_image_path = "examples/mathvista_113.png"
	image_path = os.path.join(script_dir, relative_image_path)
	prompt = "Describe the image in detail."

	# Execute the tool with default prompt
	try:
	execution = tool.execute(prompt=prompt, image=image_path)
	# execution = tool.execute(prompt=prompt)
	print("Generated Response:")
	print(execution)
	except Exception as e:
	print(f"Execution failed: {e}")

	print("Done!")