Spaces:

OctoTools
/

octotools

Runtime error

App Files Files Community

bowenchen118 commited on Feb 16

Commit

6b8dbdd

1 Parent(s): f326b43

Change to user-provided API keys

Browse files

Files changed (9) hide show

app.py +24 -49
opentools/engine/openai.py +3 -2
opentools/models/executor.py +9 -6
opentools/models/initializer.py +9 -4
opentools/models/planner.py +7 -15
opentools/models/utlis.py +0 -73
opentools/setup.py +0 -20
opentools/tools/generalist_solution_generator/tool.py +4 -2
setup.py +0 -20

app.py CHANGED Viewed

@@ -18,9 +18,7 @@ from opentools.models.initializer import Initializer
 from opentools.models.planner import Planner
 from opentools.models.memory import Memory
 from opentools.models.executor import Executor
-from opentools.models.utlis import make_json_serializable
-solver = None
 class ChatMessage:
     def __init__(self, role: str, content: str, metadata: dict = None):
@@ -63,7 +61,7 @@ class Solver:
-    def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
         """
         Streams intermediate thoughts and final responses for the problem-solving process based on user input.
@@ -198,39 +196,28 @@ def parse_arguments():
     return parser.parse_args()
-def solve_problem_gradio(user_query, user_image):
     """
     Wrapper function to connect the solver to Gradio.
     Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
     """
-    global solver  # Ensure we're using the globally defined solver
-    if solver is None:
-        return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
-    messages = []  # Initialize message list
-    for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
-        yield [[msg.role, msg.content] for msg in message_batch]  # Ensure correct format for Gradio Chatbot
-def main(args):
-    global solver
     # Initialize Tools
     enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
     # Instantiate Initializer
     initializer = Initializer(
         enabled_tools=enabled_tools,
-        model_string=args.llm_engine_name
     )
     # Instantiate Planner
     planner = Planner(
         llm_engine_name=args.llm_engine_name,
         toolbox_metadata=initializer.toolbox_metadata,
-        available_tools=initializer.available_tools
     )
     # Instantiate Memory
@@ -240,7 +227,8 @@ def main(args):
     executor = Executor(
         llm_engine_name=args.llm_engine_name,
         root_cache_dir=args.root_cache_dir,
-        enable_signal=False
     )
     # Instantiate Solver
@@ -258,44 +246,31 @@ def main(args):
         root_cache_dir=args.root_cache_dir
     )
-    # Test Inputs
-    # user_query = "How many balls are there in the image?"
-    # user_image_path = "/home/sheng/toolbox-agent/mathvista_113.png"  # Replace with your actual image path
-    # # Load the image as a PIL object
-    # user_image = Image.open(user_image_path).convert("RGB")  # Ensure it's in RGB mode
-    # print("\n=== Starting Problem Solving ===\n")
-    # messages = []
-    # for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
-    #     for message in message_batch:
-    #         print(f"{message.role}: {message.content}")
-    # messages = []
-    # solver.stream_solve_user_problem(user_query, user_image, messages)
-    # def solve_problem_stream(user_query, user_image):
-    #     messages = []  # Ensure it's a list of [role, content] pairs
-    #     for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
-    #         yield message_batch  # Stream messages correctly in tuple format
-    # solve_problem_stream(user_query, user_image)
     # ========== Gradio Interface ==========
     with gr.Blocks() as demo:
         gr.Markdown("# 🧠 OctoTools AI Solver")  # Title
         with gr.Row():
-            user_query = gr.Textbox(label="Enter your query", placeholder="Type your question here...")
-            user_image = gr.Image(type="pil", label="Upload an image")  # Accepts multiple formats
-        run_button = gr.Button("Run")  # Run button
-        chatbot_output = gr.Chatbot(label="Problem-Solving Output")
         # Link button click to function
-        run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image], outputs=chatbot_output)
     # Launch the Gradio app
     demo.launch()

 from opentools.models.planner import Planner
 from opentools.models.memory import Memory
 from opentools.models.executor import Executor
+from opentools.models.utils import make_json_serializable
 class ChatMessage:
     def __init__(self, role: str, content: str, metadata: dict = None):
+    def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, api_key: str, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
         """
         Streams intermediate thoughts and final responses for the problem-solving process based on user input.
     return parser.parse_args()
+def solve_problem_gradio(user_query, user_image, api_key):
     """
     Wrapper function to connect the solver to Gradio.
     Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
     """
     # Initialize Tools
     enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
     # Instantiate Initializer
     initializer = Initializer(
         enabled_tools=enabled_tools,
+        model_string=args.llm_engine_name,
+        api_key=api_key
     )
     # Instantiate Planner
     planner = Planner(
         llm_engine_name=args.llm_engine_name,
         toolbox_metadata=initializer.toolbox_metadata,
+        available_tools=initializer.available_tools,
+        api_key=api_key
     )
     # Instantiate Memory
     executor = Executor(
         llm_engine_name=args.llm_engine_name,
         root_cache_dir=args.root_cache_dir,
+        enable_signal=False,
+        api_key=api_key
     )
     # Instantiate Solver
         root_cache_dir=args.root_cache_dir
     )
+    if solver is None:
+        return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
+    messages = []  # Initialize message list
+    for message_batch in solver.stream_solve_user_problem(user_query, user_image, api_key, messages):
+        yield [[msg.role, msg.content] for msg in message_batch]  # Ensure correct format for Gradio Chatbot
+def main(args):
     # ========== Gradio Interface ==========
     with gr.Blocks() as demo:
         gr.Markdown("# 🧠 OctoTools AI Solver")  # Title
         with gr.Row():
+            with gr.Column(scale=1, min_width=300):
+                user_query = gr.Textbox(label="Enter your query", placeholder="Type your question here...")
+                api_key = gr.Textbox(label="API Key", placeholder="Your API key will not be stored in any way.", type="password")
+                user_image = gr.Image(type="pil", label="Upload an image")  # Accepts multiple formats
+                run_button = gr.Button("Run")  # Run button
+            with gr.Column(scale=3, min_width=300):
+                chatbot_output = gr.Chatbot(label="Problem-Solving Output")
         # Link button click to function
+        run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image, api_key], outputs=chatbot_output)
     # Launch the Gradio app
     demo.launch()

opentools/engine/openai.py CHANGED Viewed

@@ -43,6 +43,7 @@ class ChatOpenAI(EngineLM, CachedEngine):
         is_multimodal: bool=False,
         # enable_cache: bool=True,
         enable_cache: bool=False, # NOTE: disable cache for now
         **kwargs):
         """
         :param model_string:
@@ -61,11 +62,11 @@ class ChatOpenAI(EngineLM, CachedEngine):
             super().__init__(cache_path=cache_path)
         self.system_prompt = system_prompt
-        if os.getenv("OPENAI_API_KEY") is None:
             raise ValueError("Please set the OPENAI_API_KEY environment variable if you'd like to use OpenAI models.")
         self.client = OpenAI(
-            api_key=os.getenv("OPENAI_API_KEY"),
         )
         self.model_string = model_string
         self.is_multimodal = is_multimodal

         is_multimodal: bool=False,
         # enable_cache: bool=True,
         enable_cache: bool=False, # NOTE: disable cache for now
+        api_key: str=None,
         **kwargs):
         """
         :param model_string:
             super().__init__(cache_path=cache_path)
         self.system_prompt = system_prompt
+        if api_key is None:
             raise ValueError("Please set the OPENAI_API_KEY environment variable if you'd like to use OpenAI models.")
         self.client = OpenAI(
+            api_key=api_key,
         )
         self.model_string = model_string
         self.is_multimodal = is_multimodal

opentools/models/executor.py CHANGED Viewed

@@ -18,13 +18,14 @@ def timeout_handler(signum, frame):
     raise TimeoutError("Function execution timed out")
 class Executor:
-    def __init__(self, llm_engine_name: str, root_cache_dir: str = "solver_cache",  num_threads: int = 1, max_time: int = 120, max_output_length: int = 100000, enable_signal: bool = True):
         self.llm_engine_name = llm_engine_name
         self.root_cache_dir = root_cache_dir
         self.num_threads = num_threads
         self.max_time = max_time
         self.max_output_length = max_output_length
         self.enable_signal = enable_signal
     def set_query_cache_dir(self, query_cache_dir):
         if query_cache_dir:
@@ -130,7 +131,7 @@ Reason: The command should process multiple items in a single execution, not sep
 Remember: Your <command> field MUST be valid Python code including any necessary data preparation steps and one or more `execution = tool.execute(` calls, without any additional explanatory text. The format `execution = tool.execute` must be strictly followed, and the last line must begin with `execution = tool.execute` to capture the final output.
 """
-        llm_generate_tool_command = ChatOpenAI(model_string=self.llm_engine_name, is_multimodal=False)
         tool_command = llm_generate_tool_command(prompt_generate_tool_command, response_format=ToolCommand)
         return tool_command
@@ -207,12 +208,14 @@ Remember: Your <command> field MUST be valid Python code including any necessary
             # Check if the tool requires an LLM engine
             # NOTE FIXME may need to refine base.py and tool.py to handle this better
             if getattr(tool_class, 'require_llm_engine', False):
                 # Instantiate the tool with the model_string
-                tool = tool_class(model_string=self.llm_engine_name)
-            else:
-                # Instantiate the tool without model_string for tools that don't require it
-                tool = tool_class()
             # Set the custom output directory
             # NOTE FIXME: May have a better way to handle this

     raise TimeoutError("Function execution timed out")
 class Executor:
+    def __init__(self, llm_engine_name: str, root_cache_dir: str = "solver_cache",  num_threads: int = 1, max_time: int = 120, max_output_length: int = 100000, enable_signal: bool = True, api_key: str = None):
         self.llm_engine_name = llm_engine_name
         self.root_cache_dir = root_cache_dir
         self.num_threads = num_threads
         self.max_time = max_time
         self.max_output_length = max_output_length
         self.enable_signal = enable_signal
+        self.api_key = api_key
     def set_query_cache_dir(self, query_cache_dir):
         if query_cache_dir:
 Remember: Your <command> field MUST be valid Python code including any necessary data preparation steps and one or more `execution = tool.execute(` calls, without any additional explanatory text. The format `execution = tool.execute` must be strictly followed, and the last line must begin with `execution = tool.execute` to capture the final output.
 """
+        llm_generate_tool_command = ChatOpenAI(model_string=self.llm_engine_name, is_multimodal=False, api_key=self.api_key)
         tool_command = llm_generate_tool_command(prompt_generate_tool_command, response_format=ToolCommand)
         return tool_command
             # Check if the tool requires an LLM engine
             # NOTE FIXME may need to refine base.py and tool.py to handle this better
+            inputs = {}
             if getattr(tool_class, 'require_llm_engine', False):
                 # Instantiate the tool with the model_string
+                inputs['model_string'] = self.llm_engine_name
+            if getattr(tool_class, 'require_api_key', False):
+                # Instantiate the tool with the api_key
+                inputs['api_key'] = self.api_key
+            tool = tool_class(**inputs)
             # Set the custom output directory
             # NOTE FIXME: May have a better way to handle this

opentools/models/initializer.py CHANGED Viewed

@@ -7,11 +7,12 @@ from typing import Dict, Any, List, Tuple
 class Initializer:
-    def __init__(self, enabled_tools: List[str] = [], model_string: str = None):
         self.toolbox_metadata = {}
         self.available_tools = []
         self.enabled_tools = enabled_tools
         self.model_string = model_string # llm model string
         print("\nInitializing OpenTools...")
         print(f"Enabled tools: {self.enabled_tools}")
@@ -64,10 +65,14 @@ class Initializer:
                             # print(f"Class __dict__: {obj.__dict__}")
                             try:
                                 # Check if the tool requires an LLM engine
                                 if hasattr(obj, 'require_llm_engine') and obj.require_llm_engine:
-                                    tool_instance = obj(model_string=self.model_string)
-                                else:
-                                    tool_instance = obj()
                                 # print(f"\nInstance attributes: {dir(tool_instance)}")
                                 # print(f"\nInstance __dict__: {tool_instance.__dict__}")

 class Initializer:
+    def __init__(self, enabled_tools: List[str] = [], model_string: str = None, api_key: str = None):
         self.toolbox_metadata = {}
         self.available_tools = []
         self.enabled_tools = enabled_tools
         self.model_string = model_string # llm model string
+        self.api_key = api_key
         print("\nInitializing OpenTools...")
         print(f"Enabled tools: {self.enabled_tools}")
                             # print(f"Class __dict__: {obj.__dict__}")
                             try:
                                 # Check if the tool requires an LLM engine
+                                inputs = {}
                                 if hasattr(obj, 'require_llm_engine') and obj.require_llm_engine:
+                                    inputs['model_string'] = self.model_string
+                                if hasattr(obj, 'require_api_key') and obj.require_api_key:
+                                    inputs['api_key'] = self.api_key
+                                tool_instance = obj(**inputs)
                                 # print(f"\nInstance attributes: {dir(tool_instance)}")
                                 # print(f"\nInstance __dict__: {tool_instance.__dict__}")

opentools/models/planner.py CHANGED Viewed

@@ -9,10 +9,10 @@ from opentools.models.memory import Memory
 from opentools.models.formatters import QueryAnalysis, NextStep, MemoryVerification
 class Planner:
-    def __init__(self, llm_engine_name: str, toolbox_metadata: dict = None, available_tools: List = None):
         self.llm_engine_name = llm_engine_name
-        self.llm_engine_mm = ChatOpenAI(model_string=llm_engine_name, is_multimodal=True)
-        self.llm_engine = ChatOpenAI(model_string=llm_engine_name, is_multimodal=False)
         self.toolbox_metadata = toolbox_metadata if toolbox_metadata is not None else {}
         self.available_tools = available_tools if available_tools is not None else []
@@ -47,13 +47,10 @@ class Planner:
         return image_info
     def generate_base_response(self, question: str, image: str, max_tokens: str = 4000, bytes_mode: bool = False) -> str:
-        if bytes_mode:
-            image_info = self.get_image_info_bytes(image)
-        else:
-            image_info = self.get_image_info(image)
         input_data = [question]
-        if image_info and "image_path" in image_info and not bytes_mode:
             try:
                 with open(image_info["image_path"], 'rb') as file:
                     image_bytes = file.read()
@@ -66,10 +63,7 @@ class Planner:
         return self.base_response
     def analyze_query(self, question: str, image: str, bytes_mode: bool = False) -> str:
-        if bytes_mode:
-            image_info = self.get_image_info_bytes(image)
-        else:
-            image_info = self.get_image_info(image)
         print("image_info: ", image_info)
         query_prompt = f"""
@@ -100,9 +94,7 @@ Please present your analysis in a clear, structured format.
 """
         input_data = [query_prompt]
-        if bytes_mode:
-            image_bytes = image
-        else:
             try:
                 with open(image_info["image_path"], 'rb') as file:
                     image_bytes = file.read()

 from opentools.models.formatters import QueryAnalysis, NextStep, MemoryVerification
 class Planner:
+    def __init__(self, llm_engine_name: str, toolbox_metadata: dict = None, available_tools: List = None, api_key: str = None):
         self.llm_engine_name = llm_engine_name
+        self.llm_engine_mm = ChatOpenAI(model_string=llm_engine_name, is_multimodal=True, api_key=api_key)
+        self.llm_engine = ChatOpenAI(model_string=llm_engine_name, is_multimodal=False, api_key=api_key)
         self.toolbox_metadata = toolbox_metadata if toolbox_metadata is not None else {}
         self.available_tools = available_tools if available_tools is not None else []
         return image_info
     def generate_base_response(self, question: str, image: str, max_tokens: str = 4000, bytes_mode: bool = False) -> str:
+        image_info = self.get_image_info(image)
         input_data = [question]
+        if image_info and "image_path" in image_info:
             try:
                 with open(image_info["image_path"], 'rb') as file:
                     image_bytes = file.read()
         return self.base_response
     def analyze_query(self, question: str, image: str, bytes_mode: bool = False) -> str:
+        image_info = self.get_image_info(image)
         print("image_info: ", image_info)
         query_prompt = f"""
 """
         input_data = [query_prompt]
+        if image_info and "image_path" in image_info:
             try:
                 with open(image_info["image_path"], 'rb') as file:
                     image_bytes = file.read()

opentools/models/utlis.py DELETED Viewed

@@ -1,73 +0,0 @@
-# import json
-# def truncate_result(result, max_length: int = 100000, truncation_indicator: str = "...") -> str:
-#     """
-#     Truncate the result to specified length while preserving JSON structure when possible.
-#     Args:
-#         result: The result to truncate (can be str, list, dict, or other types)
-#         max_length: Maximum length of the output string (default: 1000)
-#         truncation_indicator: String to indicate truncation (default: "...")
-#     Returns:
-#         str: Truncated string representation of the result
-#     """
-#     if isinstance(result, (dict, list)):
-#         try:
-#             result_str = json.dumps(result, ensure_ascii=False)
-#         except:
-#             result_str = str(result)
-#     else:
-#         result_str = str(result)
-#     indicator_length = len(truncation_indicator)
-#     if len(result_str) > max_length:
-#         # For JSON-like strings, try to find the last complete structure
-#         if result_str.startswith('{') or result_str.startswith('['):
-#             # Find last complete element
-#             pos = max_length - indicator_length
-#             while pos > 0 and not (
-#                 result_str[pos] in ',]}' and
-#                 result_str[pos:].count('"') % 2 == 0
-#             ):
-#                 pos -= 1
-#             if pos > 0:
-#                 return result_str[:pos + 1] + truncation_indicator
-#         # Default truncation if not JSON or no suitable truncation point found
-#         return result_str[:max_length - indicator_length] + truncation_indicator
-#     return result_str
-def make_json_serializable(obj):
-    if isinstance(obj, (str, int, float, bool, type(None))):
-        return obj
-    elif isinstance(obj, dict):
-        return {make_json_serializable(key): make_json_serializable(value) for key, value in obj.items()}
-    elif isinstance(obj, list):
-        return [make_json_serializable(element) for element in obj]
-    elif hasattr(obj, '__dict__'):
-        return make_json_serializable(obj.__dict__)
-    else:
-        return str(obj)
-def make_json_serializable_truncated(obj, max_length: int = 100000):
-    if isinstance(obj, (int, float, bool, type(None))):
-        if isinstance(obj, (int, float)) and len(str(obj)) > max_length:
-            return str(obj)[:max_length - 3] + "..."
-        return obj
-    elif isinstance(obj, str):
-        return obj if len(obj) <= max_length else obj[:max_length - 3] + "..."
-    elif isinstance(obj, dict):
-        return {make_json_serializable_truncated(key, max_length): make_json_serializable_truncated(value, max_length)
-                for key, value in obj.items()}
-    elif isinstance(obj, list):
-        return [make_json_serializable_truncated(element, max_length) for element in obj]
-    elif hasattr(obj, '__dict__'):
-        return make_json_serializable_truncated(obj.__dict__, max_length)
-    else:
-        result = str(obj)
-        return result if len(result) <= max_length else result[:max_length - 3] + "..."

opentools/setup.py DELETED Viewed

@@ -1,20 +0,0 @@
-from setuptools import setup, find_packages
-setup(
-    name='opentools',
-    version='0.1.0',
-    # description='A flexible and versatile toolbox agent framework for complex tasks in both general and scientific scenarios.',
-    # long_description=open('README.md').read(),
-    # long_description_content_type='text/markdown',
-    # author='Pan Lu, Bowen Chen, Sheng Liu',
-    # author_email='lupantech@gmail.com',
-    # url='',  # You can add a GitHub or project URL here
-    packages=find_packages(),
-    # install_requires=open('requirements.txt').read().splitlines(),
-    # classifiers=[
-    #     'Programming Language :: Python :: 3',
-    #     'License :: OSI Approved :: MIT License',
-    #     'Operating System :: OS Independent',
-    # ],
-    # python_requires='>=3.10',
-)

opentools/tools/generalist_solution_generator/tool.py CHANGED Viewed

@@ -4,8 +4,9 @@ from opentools.engine.openai import ChatOpenAI
 class Generalist_Solution_Generator_Tool(BaseTool):
     require_llm_engine = True
-    def __init__(self, model_string="gpt-4o-mini"):
         super().__init__(
             tool_name="Generalist_Solution_Generator_Tool",
             tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
@@ -72,12 +73,13 @@ class Generalist_Solution_Generator_Tool(BaseTool):
             # }
         )
         self.model_string = model_string
     def execute(self, prompt, image=None):
         print(f"\nInitializing Generalist Tool with model: {self.model_string}")
         multimodal = True if image else False
-        llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal)
         try:
             input_data = [prompt]

 class Generalist_Solution_Generator_Tool(BaseTool):
     require_llm_engine = True
+    require_api_key = True
+    def __init__(self, model_string="gpt-4o-mini", api_key=None):
         super().__init__(
             tool_name="Generalist_Solution_Generator_Tool",
             tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
             # }
         )
         self.model_string = model_string
+        self.api_key = api_key
     def execute(self, prompt, image=None):
         print(f"\nInitializing Generalist Tool with model: {self.model_string}")
         multimodal = True if image else False
+        llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal, api_key=self.api_key)
         try:
             input_data = [prompt]

setup.py DELETED Viewed

@@ -1,20 +0,0 @@
-from setuptools import setup, find_packages
-setup(
-    name='opentools',
-    version='0.1.0',
-    # description='A flexible and versatile toolbox agent framework for complex tasks in both general and scientific scenarios.',
-    # long_description=open('README.md').read(),
-    # long_description_content_type='text/markdown',
-    # author='Pan Lu, Bowen Chen, Sheng Liu',
-    # author_email='lupantech@gmail.com',
-    # url='',  # You can add a GitHub or project URL here
-    packages=find_packages(),
-    # install_requires=open('requirements.txt').read().splitlines(),
-    # classifiers=[
-    #     'Programming Language :: Python :: 3',
-    #     'License :: OSI Approved :: MIT License',
-    #     'Operating System :: OS Independent',
-    # ],
-    # python_requires='>=3.10',
-)