IsaacGHX commited on
Commit
b2c5439
·
1 Parent(s): 23ae09e
Files changed (1) hide show
  1. app.py +15 -3
app.py CHANGED
@@ -780,13 +780,25 @@ def main(args):
780
  run_button.click(
781
  fn=solve_problem_gradio,
782
  inputs=[user_query, max_steps, max_time, llm_model_engine, enabled_tools],
783
- outputs=chatbot_output
 
 
784
  )
785
  #################### Gradio Interface ####################
786
 
787
- # Launch the Gradio app
 
 
 
 
 
 
788
  # demo.launch(ssr_mode=False)
789
- demo.launch(ssr_mode=False, share=True) # Added share=True parameter
 
 
 
 
790
 
791
  if __name__ == "__main__":
792
  import atexit
 
780
  run_button.click(
781
  fn=solve_problem_gradio,
782
  inputs=[user_query, max_steps, max_time, llm_model_engine, enabled_tools],
783
+ outputs=chatbot_output,
784
+ concurrency_limit=10, # A10 GPU can handle ~10 concurrent requests with vLLM
785
+ concurrency_id="agentflow_solver" # Shared queue for managing GPU resource
786
  )
787
  #################### Gradio Interface ####################
788
 
789
+ # Configure queue for high traffic - optimized for A10 GPU (40G RAM, 24G VRAM)
790
+ demo.queue(
791
+ default_concurrency_limit=10, # Balanced for A10 GPU + vLLM inference
792
+ max_size=50, # Allow up to 20 requests in queue for traffic spikes
793
+ )
794
+
795
+ # Launch the Gradio app with optimized threading
796
  # demo.launch(ssr_mode=False)
797
+ demo.launch(
798
+ ssr_mode=False,
799
+ share=True,
800
+ max_threads=80 # Increase from default 40 to support high concurrency
801
+ )
802
 
803
  if __name__ == "__main__":
804
  import atexit