Spaces:

merterbak
/

gpt-oss-20b-demo

Running on Zero

App Files Files Community

merterbak commited on Aug 5

Commit

6ce8b1e

verified ·

1 Parent(s): 2870fe9

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -8

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 from threading import Thread
 import gradio as gr
 import spaces
 model_id = "openai/gpt-oss-20b"
@@ -32,20 +33,34 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
     streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = {
-        "streamer": streamer,
         "max_new_tokens": max_new_tokens,
         "do_sample": True,
         "temperature": temperature,
         "top_p": top_p,
         "top_k": top_k,
-        "repetition_penalty": repetition_penalty
     }
     thread = Thread(target=pipe, args=(messages,), kwargs=generation_kwargs)
     thread.start()
-    outputs = []
-    for text_chunk in streamer:
-        outputs.append(text_chunk)
-    yield "".join(outputs)
 demo = gr.ChatInterface(
     fn=generate_response,
@@ -63,7 +78,7 @@ demo = gr.ChatInterface(
         gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
     ],
     examples=[
-        [{"text": "Explain Newton laws clearly and concisely."}],
         [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
         [{"text": "What are the benefits of open weight AI models"}],
     ],
@@ -71,7 +86,7 @@ demo = gr.ChatInterface(
     type="messages",
     description="""
 # gpt-oss-20b
-You can adjust reasoning level in the system prompt like "Reasoning: high".
     """,
     fill_height=True,
     textbox=gr.Textbox(

 from threading import Thread
 import gradio as gr
 import spaces
+import re
 model_id = "openai/gpt-oss-20b"
     streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = {
         "max_new_tokens": max_new_tokens,
         "do_sample": True,
         "temperature": temperature,
         "top_p": top_p,
         "top_k": top_k,
+        "repetition_penalty": repetition_penalty,
+        "streamer": streamer
     }
     thread = Thread(target=pipe, args=(messages,), kwargs=generation_kwargs)
     thread.start()
+    #streaming try #1
+    buffer = ""
+    full_response = ""
+    for chunk in streamer:
+        buffer += chunk
+        parts = re.split(r'(\s+)', buffer)
+        if re.match(r'\s+', parts[-1]) is not None:
+            to_append = ''.join(parts)
+            buffer = ""
+        else:
+            to_append = ''.join(parts[:-1])
+            buffer = parts[-1]
+        if to_append:
+            full_response += to_append
+            yield full_response
+    if buffer:
+        full_response += buffer
+        yield full_response
 demo = gr.ChatInterface(
     fn=generate_response,
         gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
     ],
     examples=[
+        [{"text": "Explain Newton laws clearly and concisely"}],
         [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
         [{"text": "What are the benefits of open weight AI models"}],
     ],
     type="messages",
     description="""
 # gpt-oss-20b
+Wait couple of seconds initially. You can adjust reasoning level in the system prompt like "Reasoning: high.
     """,
     fill_height=True,
     textbox=gr.Textbox(