ThongCoder commited on
Commit
c19dc01
·
verified ·
1 Parent(s): 5be1376

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -22
app.py CHANGED
@@ -1,42 +1,47 @@
1
  from transformers import pipeline
2
  import gradio as gr
 
3
 
4
- # Load your model with pipeline
5
  pipe = pipeline("text-generation", model="prithivMLmods/rStar-Coder-Qwen3-0.6B")
6
 
7
- # Keep a conversation history
8
  history = []
9
 
10
- def chat_fn(user_input):
11
  global history
12
- # Add user message
13
  history.append(f"User: {user_input}")
14
-
15
- # Combine all previous messages as context
16
  context = "\n".join(history) + "\nBot:"
17
-
18
- # Generate response
19
- output = pipe(context, max_new_tokens=200, do_sample=True, top_p=0.9)[0]['generated_text']
20
-
21
- # Extract only the bot's reply (after last "Bot:")
22
- bot_reply = output.split("Bot:")[-1].strip()
23
-
24
- # Add bot reply to history
 
 
 
 
 
 
25
  history.append(f"Bot: {bot_reply}")
26
-
27
- return bot_reply
28
 
29
  # Gradio interface
30
  with gr.Blocks() as demo:
31
  chatbot_ui = gr.Chatbot()
32
  msg = gr.Textbox(placeholder="Type a message...")
33
-
34
  def respond(user_input, chat_history):
35
- reply = chat_fn(user_input)
36
- chat_history.append((user_input, reply))
37
- return chat_history, chat_history
38
-
39
- state = gr.State([]) # store chat history in Gradio
 
 
 
 
40
  msg.submit(respond, [msg, state], [chatbot_ui, state])
41
 
42
  demo.launch()
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ import time
4
 
5
+ # Load model
6
  pipe = pipeline("text-generation", model="prithivMLmods/rStar-Coder-Qwen3-0.6B")
7
 
 
8
  history = []
9
 
10
+ def chat_fn_stream(user_input):
11
  global history
 
12
  history.append(f"User: {user_input}")
 
 
13
  context = "\n".join(history) + "\nBot:"
14
+
15
+ # Use a generator for streaming
16
+ for i in range(0, 8192, 20): # fake streaming in chunks
17
+ output = pipe(
18
+ context,
19
+ max_new_tokens=i+20,
20
+ do_sample=True,
21
+ top_p=0.9,
22
+ return_full_text=False
23
+ )[0]['generated_text']
24
+ bot_reply = output.split("Bot:")[-1].strip()
25
+ yield bot_reply # stream partial reply
26
+ time.sleep(0.1) # small delay to simulate streaming
27
+
28
  history.append(f"Bot: {bot_reply}")
 
 
29
 
30
  # Gradio interface
31
  with gr.Blocks() as demo:
32
  chatbot_ui = gr.Chatbot()
33
  msg = gr.Textbox(placeholder="Type a message...")
34
+
35
  def respond(user_input, chat_history):
36
+ # stream output
37
+ bot_reply = ""
38
+ for partial in chat_fn_stream(user_input):
39
+ bot_reply = partial
40
+ # append the latest partial to chat
41
+ chat_history[-1] = (user_input, bot_reply)
42
+ yield chat_history, chat_history
43
+
44
+ state = gr.State([])
45
  msg.submit(respond, [msg, state], [chatbot_ui, state])
46
 
47
  demo.launch()