Spaces:
Running
on
A10G
Running
on
A10G
IsaacGHX
commited on
Commit
·
b2c5439
1
Parent(s):
23ae09e
update
Browse files
app.py
CHANGED
|
@@ -780,13 +780,25 @@ def main(args):
|
|
| 780 |
run_button.click(
|
| 781 |
fn=solve_problem_gradio,
|
| 782 |
inputs=[user_query, max_steps, max_time, llm_model_engine, enabled_tools],
|
| 783 |
-
outputs=chatbot_output
|
|
|
|
|
|
|
| 784 |
)
|
| 785 |
#################### Gradio Interface ####################
|
| 786 |
|
| 787 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 788 |
# demo.launch(ssr_mode=False)
|
| 789 |
-
demo.launch(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 790 |
|
| 791 |
if __name__ == "__main__":
|
| 792 |
import atexit
|
|
|
|
| 780 |
run_button.click(
|
| 781 |
fn=solve_problem_gradio,
|
| 782 |
inputs=[user_query, max_steps, max_time, llm_model_engine, enabled_tools],
|
| 783 |
+
outputs=chatbot_output,
|
| 784 |
+
concurrency_limit=10, # A10 GPU can handle ~10 concurrent requests with vLLM
|
| 785 |
+
concurrency_id="agentflow_solver" # Shared queue for managing GPU resource
|
| 786 |
)
|
| 787 |
#################### Gradio Interface ####################
|
| 788 |
|
| 789 |
+
# Configure queue for high traffic - optimized for A10 GPU (40G RAM, 24G VRAM)
|
| 790 |
+
demo.queue(
|
| 791 |
+
default_concurrency_limit=10, # Balanced for A10 GPU + vLLM inference
|
| 792 |
+
max_size=50, # Allow up to 20 requests in queue for traffic spikes
|
| 793 |
+
)
|
| 794 |
+
|
| 795 |
+
# Launch the Gradio app with optimized threading
|
| 796 |
# demo.launch(ssr_mode=False)
|
| 797 |
+
demo.launch(
|
| 798 |
+
ssr_mode=False,
|
| 799 |
+
share=True,
|
| 800 |
+
max_threads=80 # Increase from default 40 to support high concurrency
|
| 801 |
+
)
|
| 802 |
|
| 803 |
if __name__ == "__main__":
|
| 804 |
import atexit
|