Spaces:

AgentFlow
/

agentflow

Running on A10G

IsaacGHX commited on Oct 10

Commit

b2c5439

1 Parent(s): 23ae09e

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -780,13 +780,25 @@ def main(args):
         run_button.click(
             fn=solve_problem_gradio,
             inputs=[user_query, max_steps, max_time, llm_model_engine, enabled_tools],
-            outputs=chatbot_output
         )
     #################### Gradio Interface ####################
-    # Launch the Gradio app
     # demo.launch(ssr_mode=False)
-    demo.launch(ssr_mode=False, share=True)  # Added share=True parameter
 if __name__ == "__main__":
     import atexit

         run_button.click(
             fn=solve_problem_gradio,
             inputs=[user_query, max_steps, max_time, llm_model_engine, enabled_tools],
+            outputs=chatbot_output,
+            concurrency_limit=10,  # A10 GPU can handle ~10 concurrent requests with vLLM
+            concurrency_id="agentflow_solver"  # Shared queue for managing GPU resource
         )
     #################### Gradio Interface ####################
+    # Configure queue for high traffic - optimized for A10 GPU (40G RAM, 24G VRAM)
+    demo.queue(
+        default_concurrency_limit=10,  # Balanced for A10 GPU + vLLM inference
+        max_size=50,  # Allow up to 20 requests in queue for traffic spikes
+    )
+    # Launch the Gradio app with optimized threading
     # demo.launch(ssr_mode=False)
+    demo.launch(
+        ssr_mode=False,
+        share=True,
+        max_threads=80  # Increase from default 40 to support high concurrency
+    )
 if __name__ == "__main__":
     import atexit