Spaces:
Sleeping
Sleeping
jedick
commited on
Commit
Β·
e4c1af6
1
Parent(s):
b42e964
Disable thinking by default
Browse files
app.py
CHANGED
|
@@ -96,7 +96,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
| 96 |
if compute_mode == "local":
|
| 97 |
gr.Info(
|
| 98 |
f"Please wait for the local model to load",
|
| 99 |
-
duration=8,
|
| 100 |
title=f"Model loading...",
|
| 101 |
)
|
| 102 |
# Get the chat model and build the graph
|
|
@@ -105,7 +104,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
| 105 |
chat_model,
|
| 106 |
compute_mode,
|
| 107 |
search_type,
|
| 108 |
-
think_answer=True,
|
| 109 |
embedding_ckpt_dir=embedding_ckpt_dir,
|
| 110 |
)
|
| 111 |
# Compile the graph with an in-memory checkpointer
|
|
@@ -225,24 +223,35 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
| 225 |
|
| 226 |
def to_workflow(request: gr.Request, *args):
|
| 227 |
"""Wrapper function to call function with or without @spaces.GPU"""
|
|
|
|
| 228 |
compute_mode = args[2]
|
| 229 |
# Add session_hash to arguments
|
| 230 |
new_args = args + (request.session_hash,)
|
| 231 |
if compute_mode == "local":
|
| 232 |
# Call the workflow function with the @spaces.GPU decorator
|
| 233 |
-
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
if compute_mode == "remote":
|
| 236 |
for value in run_workflow_remote(*new_args):
|
| 237 |
yield value
|
| 238 |
|
| 239 |
|
| 240 |
-
@spaces.GPU(duration=
|
| 241 |
def run_workflow_local(*args):
|
| 242 |
for value in run_workflow(*args):
|
| 243 |
yield value
|
| 244 |
|
| 245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
def run_workflow_remote(*args):
|
| 247 |
for value in run_workflow(*args):
|
| 248 |
yield value
|
|
@@ -401,9 +410,8 @@ with gr.Blocks(
|
|
| 401 |
status_text = f"""
|
| 402 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 403 |
β Response time is about one minute<br>
|
| 404 |
-
π§
|
| 405 |
-
 
|
| 406 |
-
  π« Add **/no_think** to disable all thinking</br>
|
| 407 |
β¨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
| 408 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 409 |
"""
|
|
@@ -432,7 +440,7 @@ with gr.Blocks(
|
|
| 432 |
questions = [
|
| 433 |
# "What is today's date?",
|
| 434 |
"Summarize emails from the last two months",
|
| 435 |
-
"Show me code examples using plotmath
|
| 436 |
"When was has.HLC mentioned?",
|
| 437 |
"Who reported installation problems in 2023-2024?",
|
| 438 |
]
|
|
@@ -456,6 +464,18 @@ with gr.Blocks(
|
|
| 456 |
|
| 457 |
return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
|
| 458 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
with gr.Row():
|
| 460 |
# Left column: Intro, Compute, Chat
|
| 461 |
with gr.Column(scale=2):
|
|
@@ -494,10 +514,9 @@ with gr.Blocks(
|
|
| 494 |
label="Multiple retrievals",
|
| 495 |
)
|
| 496 |
multi_turn_questions = gr.Examples(
|
| 497 |
-
examples=
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
],
|
| 501 |
inputs=[input],
|
| 502 |
label="Asking follow-up questions",
|
| 503 |
)
|
|
@@ -585,18 +604,18 @@ with gr.Blocks(
|
|
| 585 |
[compute_mode],
|
| 586 |
[status],
|
| 587 |
api_name=False,
|
| 588 |
-
).then(
|
| 589 |
-
# Update examples based on compute mode
|
| 590 |
-
get_example_questions,
|
| 591 |
-
[compute_mode],
|
| 592 |
-
[example_questions.dataset],
|
| 593 |
-
api_name=False,
|
| 594 |
).then(
|
| 595 |
# Update multi-tool examples based on compute mode
|
| 596 |
get_multi_tool_questions,
|
| 597 |
[compute_mode],
|
| 598 |
[multi_tool_questions.dataset],
|
| 599 |
api_name=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
)
|
| 601 |
|
| 602 |
input.submit(
|
|
|
|
| 96 |
if compute_mode == "local":
|
| 97 |
gr.Info(
|
| 98 |
f"Please wait for the local model to load",
|
|
|
|
| 99 |
title=f"Model loading...",
|
| 100 |
)
|
| 101 |
# Get the chat model and build the graph
|
|
|
|
| 104 |
chat_model,
|
| 105 |
compute_mode,
|
| 106 |
search_type,
|
|
|
|
| 107 |
embedding_ckpt_dir=embedding_ckpt_dir,
|
| 108 |
)
|
| 109 |
# Compile the graph with an in-memory checkpointer
|
|
|
|
| 223 |
|
| 224 |
def to_workflow(request: gr.Request, *args):
|
| 225 |
"""Wrapper function to call function with or without @spaces.GPU"""
|
| 226 |
+
input = args[0]
|
| 227 |
compute_mode = args[2]
|
| 228 |
# Add session_hash to arguments
|
| 229 |
new_args = args + (request.session_hash,)
|
| 230 |
if compute_mode == "local":
|
| 231 |
# Call the workflow function with the @spaces.GPU decorator
|
| 232 |
+
if "/think" in input:
|
| 233 |
+
for value in run_workflow_local_long(*new_args):
|
| 234 |
+
yield value
|
| 235 |
+
else:
|
| 236 |
+
for value in run_workflow_local(*new_args):
|
| 237 |
+
yield value
|
| 238 |
if compute_mode == "remote":
|
| 239 |
for value in run_workflow_remote(*new_args):
|
| 240 |
yield value
|
| 241 |
|
| 242 |
|
| 243 |
+
@spaces.GPU(duration=60)
|
| 244 |
def run_workflow_local(*args):
|
| 245 |
for value in run_workflow(*args):
|
| 246 |
yield value
|
| 247 |
|
| 248 |
|
| 249 |
+
@spaces.GPU(duration=100)
|
| 250 |
+
def run_workflow_local_long(*args):
|
| 251 |
+
for value in run_workflow(*args):
|
| 252 |
+
yield value
|
| 253 |
+
|
| 254 |
+
|
| 255 |
def run_workflow_remote(*args):
|
| 256 |
for value in run_workflow(*args):
|
| 257 |
yield value
|
|
|
|
| 410 |
status_text = f"""
|
| 411 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 412 |
β Response time is about one minute<br>
|
| 413 |
+
π§ Add **/think** to enable thinking</br>
|
| 414 |
+
  π’ Increases ZeroGPU allotment to 100 seconds</br>
|
|
|
|
| 415 |
β¨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
| 416 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 417 |
"""
|
|
|
|
| 440 |
questions = [
|
| 441 |
# "What is today's date?",
|
| 442 |
"Summarize emails from the last two months",
|
| 443 |
+
"Show me code examples using plotmath",
|
| 444 |
"When was has.HLC mentioned?",
|
| 445 |
"Who reported installation problems in 2023-2024?",
|
| 446 |
]
|
|
|
|
| 464 |
|
| 465 |
return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
|
| 466 |
|
| 467 |
+
def get_multi_turn_questions(compute_mode, as_dataset=True):
|
| 468 |
+
"""Get multi-turn example questions based on compute mode"""
|
| 469 |
+
questions = [
|
| 470 |
+
"Lookup emails that reference bugs.r-project.org in 2025",
|
| 471 |
+
"Did those authors report bugs before 2025? /think",
|
| 472 |
+
]
|
| 473 |
+
|
| 474 |
+
if compute_mode == "remote":
|
| 475 |
+
questions = [q.replace(" /think", "") for q in questions]
|
| 476 |
+
|
| 477 |
+
return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
|
| 478 |
+
|
| 479 |
with gr.Row():
|
| 480 |
# Left column: Intro, Compute, Chat
|
| 481 |
with gr.Column(scale=2):
|
|
|
|
| 514 |
label="Multiple retrievals",
|
| 515 |
)
|
| 516 |
multi_turn_questions = gr.Examples(
|
| 517 |
+
examples=get_multi_turn_questions(
|
| 518 |
+
compute_mode.value, as_dataset=False
|
| 519 |
+
),
|
|
|
|
| 520 |
inputs=[input],
|
| 521 |
label="Asking follow-up questions",
|
| 522 |
)
|
|
|
|
| 604 |
[compute_mode],
|
| 605 |
[status],
|
| 606 |
api_name=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 607 |
).then(
|
| 608 |
# Update multi-tool examples based on compute mode
|
| 609 |
get_multi_tool_questions,
|
| 610 |
[compute_mode],
|
| 611 |
[multi_tool_questions.dataset],
|
| 612 |
api_name=False,
|
| 613 |
+
).then(
|
| 614 |
+
# Update multi-turn examples based on compute mode
|
| 615 |
+
get_multi_turn_questions,
|
| 616 |
+
[compute_mode],
|
| 617 |
+
[multi_turn_questions.dataset],
|
| 618 |
+
api_name=False,
|
| 619 |
)
|
| 620 |
|
| 621 |
input.submit(
|