Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,42 +1,47 @@
|
|
| 1 |
from transformers import pipeline
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
|
| 4 |
-
# Load
|
| 5 |
pipe = pipeline("text-generation", model="prithivMLmods/rStar-Coder-Qwen3-0.6B")
|
| 6 |
|
| 7 |
-
# Keep a conversation history
|
| 8 |
history = []
|
| 9 |
|
| 10 |
-
def
|
| 11 |
global history
|
| 12 |
-
# Add user message
|
| 13 |
history.append(f"User: {user_input}")
|
| 14 |
-
|
| 15 |
-
# Combine all previous messages as context
|
| 16 |
context = "\n".join(history) + "\nBot:"
|
| 17 |
-
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
history.append(f"Bot: {bot_reply}")
|
| 26 |
-
|
| 27 |
-
return bot_reply
|
| 28 |
|
| 29 |
# Gradio interface
|
| 30 |
with gr.Blocks() as demo:
|
| 31 |
chatbot_ui = gr.Chatbot()
|
| 32 |
msg = gr.Textbox(placeholder="Type a message...")
|
| 33 |
-
|
| 34 |
def respond(user_input, chat_history):
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
msg.submit(respond, [msg, state], [chatbot_ui, state])
|
| 41 |
|
| 42 |
demo.launch()
|
|
|
|
| 1 |
from transformers import pipeline
|
| 2 |
import gradio as gr
|
| 3 |
+
import time
|
| 4 |
|
| 5 |
+
# Load model
|
| 6 |
pipe = pipeline("text-generation", model="prithivMLmods/rStar-Coder-Qwen3-0.6B")
|
| 7 |
|
|
|
|
| 8 |
history = []
|
| 9 |
|
| 10 |
+
def chat_fn_stream(user_input):
|
| 11 |
global history
|
|
|
|
| 12 |
history.append(f"User: {user_input}")
|
|
|
|
|
|
|
| 13 |
context = "\n".join(history) + "\nBot:"
|
| 14 |
+
|
| 15 |
+
# Use a generator for streaming
|
| 16 |
+
for i in range(0, 8192, 20): # fake streaming in chunks
|
| 17 |
+
output = pipe(
|
| 18 |
+
context,
|
| 19 |
+
max_new_tokens=i+20,
|
| 20 |
+
do_sample=True,
|
| 21 |
+
top_p=0.9,
|
| 22 |
+
return_full_text=False
|
| 23 |
+
)[0]['generated_text']
|
| 24 |
+
bot_reply = output.split("Bot:")[-1].strip()
|
| 25 |
+
yield bot_reply # stream partial reply
|
| 26 |
+
time.sleep(0.1) # small delay to simulate streaming
|
| 27 |
+
|
| 28 |
history.append(f"Bot: {bot_reply}")
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Gradio interface
|
| 31 |
with gr.Blocks() as demo:
|
| 32 |
chatbot_ui = gr.Chatbot()
|
| 33 |
msg = gr.Textbox(placeholder="Type a message...")
|
| 34 |
+
|
| 35 |
def respond(user_input, chat_history):
|
| 36 |
+
# stream output
|
| 37 |
+
bot_reply = ""
|
| 38 |
+
for partial in chat_fn_stream(user_input):
|
| 39 |
+
bot_reply = partial
|
| 40 |
+
# append the latest partial to chat
|
| 41 |
+
chat_history[-1] = (user_input, bot_reply)
|
| 42 |
+
yield chat_history, chat_history
|
| 43 |
+
|
| 44 |
+
state = gr.State([])
|
| 45 |
msg.submit(respond, [msg, state], [chatbot_ui, state])
|
| 46 |
|
| 47 |
demo.launch()
|