Spaces:

nazdridoy
/

inferoxy-hub

Running

App Files Files Community

nazdridoy commited on Aug 22

Commit

074f3bf

verified ·

1 Parent(s): 9a50492

feat(ui): add toggle to show/hide AI reasoning

Browse files

- [add] Import `re` module (utils.py:4)
- [feat] Add `render_with_reasoning_toggle` function to process `<think>` tags (utils.py:293-324)
- [add] Import `render_with_reasoning_toggle` (chat_handler.py:17)
- [feat] Modify `handle_chat_submit` to accept `show_reasoning` and use toggle function (chat_handler.py:handle_chat_submit(),202)
- [feat] Modify `handle_chat_retry` to accept `show_reasoning` and use toggle function (chat_handler.py:handle_chat_retry(),279)
- [ui] Add `gr.Checkbox` for "Show reasoning" in `create_chat_tab` (ui_components.py:52)
- [ui] Include `show_reasoning` in inputs for chat submit and retry events (ui_components.py:82,97,119)

Files changed (3) hide show

chat_handler.py +6 -5
ui_components.py +8 -3
utils.py +40 -0

chat_handler.py CHANGED Viewed

@@ -17,7 +17,8 @@ from utils import (
     parse_model_and_provider,
     format_error_message,
     check_org_access,
-    format_access_denied_message
 )
 # Timeout configuration for inference requests
@@ -167,7 +168,7 @@ def chat_respond(
         yield format_error_message("Unexpected Error", f"An unexpected error occurred: {error_msg}")
-def handle_chat_submit(message, history, system_msg, model_name, max_tokens, temperature, top_p, hf_token: gr.OAuthToken = None):
     """
     Handle chat submission and manage conversation history with streaming.
     """
@@ -202,13 +203,13 @@ def handle_chat_submit(message, history, system_msg, model_name, max_tokens, tem
     # Stream the assistant response token by token
     assistant_response = ""
     for partial_response in response_generator:
-        assistant_response = partial_response
         # Update history with the current partial response and yield it
         current_history = history + [{"role": "assistant", "content": assistant_response}]
         yield current_history, ""
-def handle_chat_retry(history, system_msg, model_name, max_tokens, temperature, top_p, hf_token: gr.OAuthToken = None, retry_data=None):
     """
     Retry the assistant response for the selected message.
     Works with gr.Chatbot.retry() which provides retry_data.index for the message.
@@ -274,6 +275,6 @@ def handle_chat_retry(history, system_msg, model_name, max_tokens, temperature,
     assistant_response = ""
     for partial_response in response_generator:
-        assistant_response = partial_response
         current_history = trimmed_history + [{"role": "assistant", "content": assistant_response}]
         yield current_history

     parse_model_and_provider,
     format_error_message,
     check_org_access,
+    format_access_denied_message,
+    render_with_reasoning_toggle
 )
 # Timeout configuration for inference requests
         yield format_error_message("Unexpected Error", f"An unexpected error occurred: {error_msg}")
+def handle_chat_submit(message, history, system_msg, model_name, max_tokens, temperature, top_p, show_reasoning=False, hf_token: gr.OAuthToken = None):
     """
     Handle chat submission and manage conversation history with streaming.
     """
     # Stream the assistant response token by token
     assistant_response = ""
     for partial_response in response_generator:
+        assistant_response = render_with_reasoning_toggle(partial_response, bool(show_reasoning))
         # Update history with the current partial response and yield it
         current_history = history + [{"role": "assistant", "content": assistant_response}]
         yield current_history, ""
+def handle_chat_retry(history, system_msg, model_name, max_tokens, temperature, top_p, show_reasoning=False, hf_token: gr.OAuthToken = None, retry_data=None):
     """
     Retry the assistant response for the selected message.
     Works with gr.Chatbot.retry() which provides retry_data.index for the message.
     assistant_response = ""
     for partial_response in response_generator:
+        assistant_response = render_with_reasoning_toggle(partial_response, bool(show_reasoning))
         current_history = trimmed_history + [{"role": "assistant", "content": assistant_response}]
         yield current_history

ui_components.py CHANGED Viewed

@@ -52,6 +52,11 @@ def create_chat_tab(handle_chat_submit_fn, handle_chat_retry_fn=None):
                     lines=2,
                     placeholder="Define the assistant's personality and behavior..."
                 )
             with gr.Column(scale=1):
                 chat_max_tokens = gr.Slider(
@@ -82,7 +87,7 @@ def create_chat_tab(handle_chat_submit_fn, handle_chat_retry_fn=None):
         chat_send_event = chat_submit.click(
             fn=handle_chat_submit_fn,
             inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
-                   chat_max_tokens, chat_temperature, chat_top_p],
             outputs=[chatbot_display, chat_input]
         )
@@ -97,7 +102,7 @@ def create_chat_tab(handle_chat_submit_fn, handle_chat_retry_fn=None):
         chat_enter_event = chat_input.submit(
             fn=handle_chat_submit_fn,
             inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
-                   chat_max_tokens, chat_temperature, chat_top_p],
             outputs=[chatbot_display, chat_input]
         )
@@ -119,7 +124,7 @@ def create_chat_tab(handle_chat_submit_fn, handle_chat_retry_fn=None):
             chatbot_display.retry(
                 fn=handle_chat_retry_fn,
                 inputs=[chatbot_display, chat_system_message, chat_model_name,
-                        chat_max_tokens, chat_temperature, chat_top_p],
                 outputs=chatbot_display
             )

                     lines=2,
                     placeholder="Define the assistant's personality and behavior..."
                 )
+                show_reasoning = gr.Checkbox(
+                    value=False,
+                    label="Show reasoning (<think>…</think>)",
+                    info="Reveal model's reasoning, if present",
+                )
             with gr.Column(scale=1):
                 chat_max_tokens = gr.Slider(
         chat_send_event = chat_submit.click(
             fn=handle_chat_submit_fn,
             inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
+                   chat_max_tokens, chat_temperature, chat_top_p, show_reasoning],
             outputs=[chatbot_display, chat_input]
         )
         chat_enter_event = chat_input.submit(
             fn=handle_chat_submit_fn,
             inputs=[chat_input, chatbot_display, chat_system_message, chat_model_name,
+                   chat_max_tokens, chat_temperature, chat_top_p, show_reasoning],
             outputs=[chatbot_display, chat_input]
         )
             chatbot_display.retry(
                 fn=handle_chat_retry_fn,
                 inputs=[chatbot_display, chat_system_message, chat_model_name,
+                        chat_max_tokens, chat_temperature, chat_top_p, show_reasoning],
                 outputs=chatbot_display
             )

utils.py CHANGED Viewed

@@ -4,6 +4,7 @@ Contains configuration constants and helper functions.
 """
 import os
 import requests
@@ -290,3 +291,42 @@ def check_org_access(access_token: str) -> tuple[bool, str, str | None, list[str
 def format_access_denied_message(message: str) -> str:
     """Return a standardized access denied message for UI display."""
     return format_error_message("Access Denied", message)

 """
 import os
+import re
 import requests
 def format_access_denied_message(message: str) -> str:
     """Return a standardized access denied message for UI display."""
     return format_error_message("Access Denied", message)
+# -----------------------------
+# Reasoning (<think>) utilities
+# -----------------------------
+def render_with_reasoning_toggle(text: str, show_reasoning: bool) -> str:
+    """Render assistant text while optionally revealing content inside <think>...</think>.
+    When show_reasoning is True, wrap the reasoning content in a collapsible HTML details block
+    with a fenced code block for readability. When False, strip the reasoning content entirely.
+    This function is designed to be called repeatedly during streaming; it will simply do nothing
+    until both opening and closing tags appear in the text.
+    """
+    if not isinstance(text, str) or "<think>" not in text:
+        return text
+    pattern = re.compile(r"<think>([\s\S]*?)</think>", re.IGNORECASE)
+    # If the closing tag hasn't arrived yet (streaming), hide the partial reasoning
+    if "</think>" not in text:
+        # Trim everything from the first <think> onwards
+        head = text.split("<think>", 1)[0]
+        return head
+    def _replace(match: re.Match) -> str:
+        content = match.group(1).strip()
+        if not show_reasoning:
+            return ""
+        # Use HTML <details> which is generally supported by Markdown renderers
+        # and keep the reasoning in a code fence for safe rendering.
+        return (
+            "<details><summary>Reasoning</summary>\n\n"
+            "```text\n" + content + "\n```\n"
+            "</details>\n"
+        )
+    return pattern.sub(_replace, text)