nazdridoy commited on
Commit
074f3bf
·
verified ·
1 Parent(s): 9a50492

feat(ui): add toggle to show/hide AI reasoning

Browse files

- [add] Import `re` module (utils.py:4)
- [feat] Add `render_with_reasoning_toggle` function to process `<think>` tags (utils.py:293-324)
- [add] Import `render_with_reasoning_toggle` (chat_handler.py:17)
- [feat] Modify `handle_chat_submit` to accept `show_reasoning` and use toggle function (chat_handler.py:handle_chat_submit(),202)
- [feat] Modify `handle_chat_retry` to accept `show_reasoning` and use toggle function (chat_handler.py:handle_chat_retry(),279)
- [ui] Add `gr.Checkbox` for "Show reasoning" in `create_chat_tab` (ui_components.py:52)
- [ui] Include `show_reasoning` in inputs for chat submit and retry events (ui_components.py:82,97,119)

Files changed (3) hide show
  1. chat_handler.py +6 -5
  2. ui_components.py +8 -3
  3. utils.py +40 -0
chat_handler.py CHANGED
@@ -17,7 +17,8 @@ from utils import (
17
  parse_model_and_provider,
18
  format_error_message,
19
  check_org_access,
20
- format_access_denied_message
 
21
  )
22
 
23
  # Timeout configuration for inference requests
@@ -167,7 +168,7 @@ def chat_respond(
167
  yield format_error_message("Unexpected Error", f"An unexpected error occurred: {error_msg}")
168
 
169
 
170
- def handle_chat_submit(message, history, system_msg, model_name, max_tokens, temperature, top_p, hf_token: gr.OAuthToken = None):
171
  """
172
  Handle chat submission and manage conversation history with streaming.
173
  """
@@ -202,13 +203,13 @@ def handle_chat_submit(message, history, system_msg, model_name, max_tokens, tem
202
  # Stream the assistant response token by token
203
  assistant_response = ""
204
  for partial_response in response_generator:
205
- assistant_response = partial_response
206
  # Update history with the current partial response and yield it
207
  current_history = history + [{"role": "assistant", "content": assistant_response}]
208
  yield current_history, ""
209
 
210
 
211
- def handle_chat_retry(history, system_msg, model_name, max_tokens, temperature, top_p, hf_token: gr.OAuthToken = None, retry_data=None):
212
  """
213
  Retry the assistant response for the selected message.
214
  Works with gr.Chatbot.retry() which provides retry_data.index for the message.
@@ -274,6 +275,6 @@ def handle_chat_retry(history, system_msg, model_name, max_tokens, temperature,
274
 
275
  assistant_response = ""
276
  for partial_response in response_generator:
277
- assistant_response = partial_response
278
  current_history = trimmed_history + [{"role": "assistant", "content": assistant_response}]
279
  yield current_history
 
17
  parse_model_and_provider,
18
  format_error_message,
19
  check_org_access,
20
+ format_access_denied_message,
21
+ render_with_reasoning_toggle
22
  )
23
 
24
  # Timeout configuration for inference requests
 
168
  yield format_error_message("Unexpected Error", f"An unexpected error occurred: {error_msg}")
169
 
170
 
171
+ def handle_chat_submit(message, history, system_msg, model_name, max_tokens, temperature, top_p, show_reasoning=False, hf_token: gr.OAuthToken = None):
172
  """
173
  Handle chat submission and manage conversation history with streaming.
174
  """
 
203
  # Stream the assistant response token by token
204
  assistant_response = ""
205
  for partial_response in response_generator:
206
+ assistant_response = render_with_reasoning_toggle(partial_response, bool(show_reasoning))
207
  # Update history with the current partial response and yield it
208
  current_history = history + [{"role": "assistant", "content": assistant_response}]
209
  yield current_history, ""
210
 
211
 
212
+ def handle_chat_retry(history, system_msg, model_name, max_tokens, temperature, top_p, show_reasoning=False, hf_token: gr.OAuthToken = None, retry_data=None):
213
  """
214
  Retry the assistant response for the selected message.
215
  Works with gr.Chatbot.retry() which provides retry_data.index for the message.
 
275
 
276
  assistant_response = ""
277
  for partial_response in response_generator:
278
+ assistant_response = render_with_reasoning_toggle(partial_response, bool(show_reasoning))
279
  current_history = trimmed_history + [{"role": "assistant", "content": assistant_response}]
280
  yield current_history
ui_components.py CHANGED
@@ -52,6 +52,11 @@ def create_chat_tab(handle_chat_submit_fn, handle_chat_retry_fn=None):
52
  lines=2,
53
  placeholder="Define the assistant's personality and behavior..."
54
  )
 
 
 
 
 
55
 
56
  with gr.Column(scale=1):
57
  chat_max_tokens = gr.Slider(
@@ -82,7 +87,7 @@ def create_chat_tab(handle_chat_submit_fn, handle_chat_retry_fn=None):
82
  chat_send_event = chat_submit.click(
83
  fn=handle_chat_submit_fn,
84
  inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
85
- chat_max_tokens, chat_temperature, chat_top_p],
86
  outputs=[chatbot_display, chat_input]
87
  )
88
 
@@ -97,7 +102,7 @@ def create_chat_tab(handle_chat_submit_fn, handle_chat_retry_fn=None):
97
  chat_enter_event = chat_input.submit(
98
  fn=handle_chat_submit_fn,
99
  inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
100
- chat_max_tokens, chat_temperature, chat_top_p],
101
  outputs=[chatbot_display, chat_input]
102
  )
103
 
@@ -119,7 +124,7 @@ def create_chat_tab(handle_chat_submit_fn, handle_chat_retry_fn=None):
119
  chatbot_display.retry(
120
  fn=handle_chat_retry_fn,
121
  inputs=[chatbot_display, chat_system_message, chat_model_name,
122
- chat_max_tokens, chat_temperature, chat_top_p],
123
  outputs=chatbot_display
124
  )
125
 
 
52
  lines=2,
53
  placeholder="Define the assistant's personality and behavior..."
54
  )
55
+ show_reasoning = gr.Checkbox(
56
+ value=False,
57
+ label="Show reasoning (<think>…</think>)",
58
+ info="Reveal model's reasoning, if present",
59
+ )
60
 
61
  with gr.Column(scale=1):
62
  chat_max_tokens = gr.Slider(
 
87
  chat_send_event = chat_submit.click(
88
  fn=handle_chat_submit_fn,
89
  inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
90
+ chat_max_tokens, chat_temperature, chat_top_p, show_reasoning],
91
  outputs=[chatbot_display, chat_input]
92
  )
93
 
 
102
  chat_enter_event = chat_input.submit(
103
  fn=handle_chat_submit_fn,
104
  inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
105
+ chat_max_tokens, chat_temperature, chat_top_p, show_reasoning],
106
  outputs=[chatbot_display, chat_input]
107
  )
108
 
 
124
  chatbot_display.retry(
125
  fn=handle_chat_retry_fn,
126
  inputs=[chatbot_display, chat_system_message, chat_model_name,
127
+ chat_max_tokens, chat_temperature, chat_top_p, show_reasoning],
128
  outputs=chatbot_display
129
  )
130
 
utils.py CHANGED
@@ -4,6 +4,7 @@ Contains configuration constants and helper functions.
4
  """
5
 
6
  import os
 
7
  import requests
8
 
9
 
@@ -290,3 +291,42 @@ def check_org_access(access_token: str) -> tuple[bool, str, str | None, list[str
290
  def format_access_denied_message(message: str) -> str:
291
  """Return a standardized access denied message for UI display."""
292
  return format_error_message("Access Denied", message)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
 
6
  import os
7
+ import re
8
  import requests
9
 
10
 
 
291
  def format_access_denied_message(message: str) -> str:
292
  """Return a standardized access denied message for UI display."""
293
  return format_error_message("Access Denied", message)
294
+
295
+
296
+ # -----------------------------
297
+ # Reasoning (<think>) utilities
298
+ # -----------------------------
299
+
300
+ def render_with_reasoning_toggle(text: str, show_reasoning: bool) -> str:
301
+ """Render assistant text while optionally revealing content inside <think>...</think>.
302
+
303
+ When show_reasoning is True, wrap the reasoning content in a collapsible HTML details block
304
+ with a fenced code block for readability. When False, strip the reasoning content entirely.
305
+
306
+ This function is designed to be called repeatedly during streaming; it will simply do nothing
307
+ until both opening and closing tags appear in the text.
308
+ """
309
+ if not isinstance(text, str) or "<think>" not in text:
310
+ return text
311
+
312
+ pattern = re.compile(r"<think>([\s\S]*?)</think>", re.IGNORECASE)
313
+
314
+ # If the closing tag hasn't arrived yet (streaming), hide the partial reasoning
315
+ if "</think>" not in text:
316
+ # Trim everything from the first <think> onwards
317
+ head = text.split("<think>", 1)[0]
318
+ return head
319
+
320
+ def _replace(match: re.Match) -> str:
321
+ content = match.group(1).strip()
322
+ if not show_reasoning:
323
+ return ""
324
+ # Use HTML <details> which is generally supported by Markdown renderers
325
+ # and keep the reasoning in a code fence for safe rendering.
326
+ return (
327
+ "<details><summary>Reasoning</summary>\n\n"
328
+ "```text\n" + content + "\n```\n"
329
+ "</details>\n"
330
+ )
331
+
332
+ return pattern.sub(_replace, text)