Spaces:

ThongCoder
/

Qwen3-0.6B-Coder

Paused

ThongCoder commited on Sep 7

Commit

c19dc01

verified ·

1 Parent(s): 5be1376

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,42 +1,47 @@
 from transformers import pipeline
 import gradio as gr
-# Load your model with pipeline
 pipe = pipeline("text-generation", model="prithivMLmods/rStar-Coder-Qwen3-0.6B")
-# Keep a conversation history
 history = []
-def chat_fn(user_input):
     global history
-    # Add user message
     history.append(f"User: {user_input}")
-    # Combine all previous messages as context
     context = "\n".join(history) + "\nBot:"
-    # Generate response
-    output = pipe(context, max_new_tokens=200, do_sample=True, top_p=0.9)[0]['generated_text']
-    # Extract only the bot's reply (after last "Bot:")
-    bot_reply = output.split("Bot:")[-1].strip()
-    # Add bot reply to history
     history.append(f"Bot: {bot_reply}")
-    return bot_reply
 # Gradio interface
 with gr.Blocks() as demo:
     chatbot_ui = gr.Chatbot()
     msg = gr.Textbox(placeholder="Type a message...")
     def respond(user_input, chat_history):
-        reply = chat_fn(user_input)
-        chat_history.append((user_input, reply))
-        return chat_history, chat_history
-    state = gr.State([])  # store chat history in Gradio
     msg.submit(respond, [msg, state], [chatbot_ui, state])
 demo.launch()

 from transformers import pipeline
 import gradio as gr
+import time
+# Load model
 pipe = pipeline("text-generation", model="prithivMLmods/rStar-Coder-Qwen3-0.6B")
 history = []
+def chat_fn_stream(user_input):
     global history
     history.append(f"User: {user_input}")
     context = "\n".join(history) + "\nBot:"
+    # Use a generator for streaming
+    for i in range(0, 8192, 20):  # fake streaming in chunks
+        output = pipe(
+            context,
+            max_new_tokens=i+20,
+            do_sample=True,
+            top_p=0.9,
+            return_full_text=False
+        )[0]['generated_text']
+        bot_reply = output.split("Bot:")[-1].strip()
+        yield bot_reply  # stream partial reply
+        time.sleep(0.1)  # small delay to simulate streaming
     history.append(f"Bot: {bot_reply}")
 # Gradio interface
 with gr.Blocks() as demo:
     chatbot_ui = gr.Chatbot()
     msg = gr.Textbox(placeholder="Type a message...")
     def respond(user_input, chat_history):
+        # stream output
+        bot_reply = ""
+        for partial in chat_fn_stream(user_input):
+            bot_reply = partial
+            # append the latest partial to chat
+            chat_history[-1] = (user_input, bot_reply)
+            yield chat_history, chat_history
+    state = gr.State([])
     msg.submit(respond, [msg, state], [chatbot_ui, state])
 demo.launch()